diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 18be4504c9bad222377360a0376b2190736ced3c..33f72ecb941c410f7541f32541a34878ff987b3f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,6 +10,8 @@
         "notskm.clang-tidy",
         "streetsidesoftware.code-spell-checker"
     ],
-    "runArgs": ["--gpus","all"],
-    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.3"
+    "runArgs": ["--gpus","all",                     // remove this line in case you have no gpus available
+                "--hostname=${localEnv:HOSTNAME}"], // HOSTNAME needs to be known by the vscode environment. It is probably necessary to add "export HOSTNAME=<hostname>" to the config file of your host machine's bash.
+
+    "image": "git.rz.tu-bs.de:4567/irmb/virtualfluids/ubuntu20_04:1.4"
 }
diff --git a/.gitignore b/.gitignore
index ae19800aa7dd8a859144426e8280eb3718add848..f87c8efbbd3b3877bd77212d6c2184db2aa409f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,7 @@ output/
 logs/
 
 # grid
-grid/
+.grid/
 
 # scripts
 scripts/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bab479082aa0927a86eb8224fdcd83031cf792f9..c2cfa45ae5a1be0e5b0a5c6cb918de22221a4215 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -201,46 +201,6 @@ gcc_9_python_bindings_test:
   script:
     - python3 -m unittest discover -s Python -v
 
-###############################################################################
-gcc_9_python_slurm_test:
-  stage: test
-
-  needs: ["gcc_9_python"]
-
-  rules:
-    - if: $PHOENIX_PRIVATE_KEY
-
-  tags:
-    - linux
-    - privileged
-
-  variables:
-    SSH_KEY: "$PHOENIX_PRIVATE_KEY"
-    HOST: "$PHOENIX_HOSTNAME"
-    USER: "$PHOENIX_USER"
-
-  before_script:
-    - 'command -v ssh-agent >/dev/null || ( apt-get update -y && apt-get install openssh-client -y )'
-    - apt-get install -y rsync
-    - mkdir -p ~/.ssh
-    - chmod 700 ~/.ssh
-    - eval $(ssh-agent -s)
-    - echo "$SSH_KEY" | tr -d '\r' | ssh-add -
-    - echo $SSH_KEY >> ansible/private_key
-    - ssh-keyscan -t rsa $HOST >> ~/.ssh/known_hosts
-    - ssh $USER@$HOST "rm -rf output-*"
-    - ssh $USER@$HOST "rm -f *.out"
-    - pip3 install git+git://github.com/SvenMarcus/ssh-slurm-runner
-
-  script:
-    - singularity build PoiseuilleTestContainer.sif Python/SlurmTests/poiseuille/PoiseuilleTestContainer.def
-    - scp PoiseuilleTestContainer.sif $USER@$HOST:PoiseuilleTestContainer.sif
-    - scp Python/SlurmTests/poiseuille/slurm.job $USER@$HOST:slurm.job
-    - python3 -m ssh_slurm_runner slurm.job --host $HOST --user $USER --keyfile ansible/private_key
-    - ssh $USER@$HOST "rm -rf output-*"
-    - ssh $USER@$HOST "rm -f *.out"
-    - ssh $USER@$HOST "rm PoiseuilleTestContainer.sif"
-    - ssh $USER@$HOST "rm slurm.job"
 
 ###############################################################################
 ##                            Benchmark                                      ##
@@ -264,7 +224,13 @@ nvidia_test:
 gpu_numerical_tests:
   stage: benchmark
 
-  when: manual
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      when: always
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+      when: never
+    - when: manual
+      allow_failure: true
 
   needs: []
 
@@ -290,7 +256,8 @@ gpu_numerical_tests:
 
   script:
     - cd $CI_PROJECT_DIR
-    - ./build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt 2>&1 | tee -a numerical_tests_gpu_results.txt
+    # - ./build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt 2>&1 | tee -a numerical_tests_gpu_results.txt
+    - ./build/bin/NumericalTests $CI_PROJECT_DIR/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt 2>&1 | tee -a numerical_tests_gpu_results.txt
 
   cache:
     key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
diff --git a/CMake/VirtualFluidsMacros.cmake b/CMake/VirtualFluidsMacros.cmake
index fbe0d9404e6c923bfb866e7d8dea7844b51ae17a..63503f5f14221bb8cec7670dbdda6aa92497d327 100644
--- a/CMake/VirtualFluidsMacros.cmake
+++ b/CMake/VirtualFluidsMacros.cmake
@@ -315,4 +315,14 @@ endfunction()
 #################################################################################
 function(groupTarget targetName folderName)
     set_property( TARGET  ${targetName}  PROPERTY  FOLDER  ${folderName} )
-endfunction(groupTarget)
\ No newline at end of file
+endfunction(groupTarget)
+
+
+#################################################################################
+## load user apps, which are specified in the machine file
+#################################################################################
+function(vf_load_user_apps)
+    foreach(app IN LISTS USER_APPS)
+      add_subdirectory(${app})
+    endforeach()
+endfunction()
\ No newline at end of file
diff --git a/CMake/cmake_config_files/AMATERASU.config.cmake b/CMake/cmake_config_files/AMATERASU.config.cmake
deleted file mode 100644
index e65b197cba1a5d70007144c09cd80c85c33a0696..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/AMATERASU.config.cmake
+++ /dev/null
@@ -1,14 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Martin Geier
-# OS:          Windows 10
-#################################################################################
-
-
-
-#################################################################################
-#  METIS  
-#################################################################################
-#SET(METIS_INCLUDEDIR "C:/Users/geier/Documents/metis-5.1.0/include")
-#SET(METIS_DEBUG_LIBRARY "C:/Users/geier/Documents/metis-5.1.0/build/libmetis/Debug/metis.lib")
-#SET(METIS_RELEASE_LIBRARY "C:/Users/geier/Documents/metis-5.1.0/build/libmetis/Release/metis.lib")
diff --git a/CMake/cmake_config_files/ARAGORN.config.cmake b/CMake/cmake_config_files/ARAGORN.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..d713f02d971024f29d3fb0fd30cfce7585d9dc55
--- /dev/null
+++ b/CMake/cmake_config_files/ARAGORN.config.cmake
@@ -0,0 +1,19 @@
+#################################################################################
+# VirtualFluids MACHINE FILE
+# Responsible: Anna Wellmann
+# OS:          Windows 11
+#################################################################################
+
+set(CMAKE_CUDA_ARCHITECTURES 86)     # Nvidia GeForce RTX 3060
+
+# numerical tests location of the grids
+# SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
+# list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
+
+# add invidual apps here
+set(GPU_APP "apps/gpu/LBM/")
+list(APPEND USER_APPS 
+    "${GPU_APP}DrivenCavityMultiGPU"
+    "${GPU_APP}SphereScaling"
+    # "${GPU_APP}MusselOyster"
+    )
diff --git a/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..c8432efe045c386174a9e2a04988ed51ed794bf3
--- /dev/null
+++ b/CMake/cmake_config_files/ARAGORNUBUNTU.config.cmake
@@ -0,0 +1,14 @@
+#################################################################################
+# VirtualFluids MACHINE FILE
+# Responsible: Anna Wellmann
+# OS:          Ubuntu 20.04 (Docker container)
+#################################################################################
+
+set(CMAKE_CUDA_ARCHITECTURES 86)     # Nvidia GeForce RTX 3060
+
+set(GPU_APP "apps/gpu/LBM/")
+list(APPEND USER_APPS 
+    "${GPU_APP}DrivenCavityMultiGPU"
+    "${GPU_APP}SphereScaling"
+    # "${GPU_APP}MusselOyster"
+    )
\ No newline at end of file
diff --git a/CMake/cmake_config_files/BAUMBART.config.cmake b/CMake/cmake_config_files/BAUMBART.config.cmake
deleted file mode 100644
index ba0412c88fb9b1982c73c1256ae17eec2901d54c..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/BAUMBART.config.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Martin Schoenherr
-# OS:          Windows 10
-#################################################################################
-#Don't change:
-SET(METIS_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/metis/metis-5.1.0 CACHE PATH "METIS ROOT") 
-SET(GMOCK_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/googletest CACHE PATH "GMOCK ROOT")
-SET(JSONCPP_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/jsoncpp CACHE PATH "JSONCPP ROOT") 
-SET(FFTW_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/fftw/fftw-3.3.7 CACHE PATH "JSONCPP ROOT")
-
-
-#SET TO CORRECT PATH:
-SET(BOOST_ROOT  "C:\\Program Files\\boost\\boost_1_63_0"  CACHE PATH "BOOST_ROOT")
-SET(BOOST_LIBRARYDIR  "C:\\Program Files\\boost\\boost_1_63_0\\stage\\x64\\lib" CACHE PATH "BOOST_LIBRARYDIR")
-
-SET(VTK_DIR "E:/libraries/VTK-8.2.0/build" CACHE PATH "VTK directory override" FORCE)
-
diff --git a/CMake/cmake_config_files/BILBO.config.cmake b/CMake/cmake_config_files/BILBO.config.cmake
deleted file mode 100644
index 093fe3200837a3916e8a14b16042732c2b6db218..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/BILBO.config.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Soeren Peters
-# OS:          MacOS X
-#################################################################################
diff --git a/CMake/cmake_config_files/BLOGIN1.config.cmake b/CMake/cmake_config_files/BLOGIN1.config.cmake
deleted file mode 100644
index 9400e45f964dd3e0331b178e8b766c4c7fa7c08f..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/BLOGIN1.config.cmake
+++ /dev/null
@@ -1 +0,0 @@
-INCLUDE("CMake/cmake_config_files/LISE.config.cmake")
\ No newline at end of file
diff --git a/CMake/cmake_config_files/BLOGIN2.config.cmake b/CMake/cmake_config_files/BLOGIN2.config.cmake
deleted file mode 100644
index 9400e45f964dd3e0331b178e8b766c4c7fa7c08f..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/BLOGIN2.config.cmake
+++ /dev/null
@@ -1 +0,0 @@
-INCLUDE("CMake/cmake_config_files/LISE.config.cmake")
\ No newline at end of file
diff --git a/CMake/cmake_config_files/BOMBADIL.config.cmake b/CMake/cmake_config_files/BOMBADIL.config.cmake
deleted file mode 100644
index c0c5cf2f08b2cc925be248441c99a336160fd1bd..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/BOMBADIL.config.cmake
+++ /dev/null
@@ -1,86 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Konstantin Kutscher
-# OS:          Windows 10
-#################################################################################
-
-#################################################################################
-# MPI
-#################################################################################
-#SET(MPI_DIR  "C:/Program Files (x86)/mpich2")
-#SET(MPI_DIR  "C:/Program Files/mpich2")
-#SET(USE_MPI_CXX_SYNTAX OFF)
-#SET(MPI_COMPILER "C:/Program Files/mpich2/bin/mpicxx")
-#SET(MPI_INCLUDE_PATH "C:/Program Files (x86)/mpich2/include")
-#SET(MPI_LIBRARY "C:/Program Files/mpich2/lib/libmpi.a")
-#SET(MPI_CXX_LIBRARY "C:/Program Files/MPICH2/lib/cxx.lib")
-#################################################################################
-#  BOOST  
-#################################################################################
-SET(BOOST_VERSION "1.60.0")
-SET(BOOST_ROOT "d:/Tools/boost/boost_1_60_0")
-SET(BOOST_DIR ${BOOST_ROOT})
-SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
-#################################################################################
-#  VTK  
-#################################################################################
-set(VTK_DIR "d:/Tools/VTK/build/VTK-8.0.0")
-
-#################################################################################
-#  METIS  
-#################################################################################
-#IF(${USE_METIS})
-#  SET(METIS_INCLUDEDIR "d:/Tools/metis-5.1.0/include")
-#  SET(METIS_DEBUG_LIBRARY "d:/Tools/metis-5.1.0/build/libmetis/Debug/metis.lib") 
-#  SET(METIS_RELEASE_LIBRARY "d:/Tools/metis-5.1.0/build/libmetis/Release/metis.lib") 
-  
-  # SET(METIS_INCLUDEDIR "/mnt/d/Tools/metis-5.1.0/include")
-  # SET(METIS_DEBUG_LIBRARY "/mnt/d/Tools/metis-5.1.0/build/Linux-x86_64/libmetis/libmetis.a") 
-  # SET(METIS_RELEASE_LIBRARY "/mnt/d/Tools/metis-5.1.0/build/Linux-x86_64/libmetis/libmetis.a") 
-#ENDIF()
-
-#################################################################################
-#  PE  
-#################################################################################
-IF(${USE_DEM_COUPLING})
-  SET(PE_BINARY_DIR "d:/Tools/waLBerla/walberlaGit/build" CACHE PATH "pe binary dir")
-  SET(PE_ROOT "d:/Tools/waLBerla/walberlaGit" CACHE PATH "pe root")
- 
-  SET(PE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/pe/Debug/pe.lib) 
-  SET(PE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/pe/Release/pe.lib)
-  SET(BLOCKFOREST_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/blockforest/Debug/blockforest.lib) 
-  SET(BLOCKFOREST_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/blockforest/Release/blockforest.lib)
-  SET(DOMAIN_DECOMPOSITION_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/Debug/domain_decomposition.lib) 
-  SET(DOMAIN_DECOMPOSITION_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/Release/domain_decomposition.lib)
-  SET(GEOMETRY_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/geometry/Debug/geometry.lib) 
-  SET(GEOMETRY_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/geometry/Release/geometry.lib)
-  SET(CORE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/core/Debug/core.lib) 
-  SET(CORE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/core/Release/core.lib)
-
- ENDIF()
-
-##################################################################################
-#  FETOL
-##################################################################################
-IF(${USE_FETOL})
-  SET(FETOL_INCLUDEDIR "d:/Projects/FETOL/dev/CppFETOLlib")
-  SET(FETOL_DEBUG_LIBRARY "d:/Projects/FETOL/dev/CppFETOLlib/build/Debug/fetol.lib") 
-  SET(FETOL_RELEASE_LIBRARY "d:/Projects/FETOL/dev/CppFETOLlib/build/Release/fetol.lib") 
-  
-  SET(YAML_INCLUDEDIR "d:/Tools/yaml-cpp/include")
-  SET(YAML_DEBUG_LIBRARY "d:/Tools/yaml-cpp/buildVS11/Debug/libyaml-cppmdd.lib") 
-  SET(YAML_RELEASE_LIBRARY "d:/Tools/yaml-cpp/buildVS11/Release/libyaml-cppmd.lib") 
-  
-  SET(BOND_INCLUDEDIR "d:/Projects/FETOL/dev/bond_src/cpp/bond/fetol")
-  SET(BOND_DEBUG_LIBRARY "d:/Projects/FETOL/dev/bond_lib/Debug/bond.lib") 
-  SET(BOND_RELEASE_LIBRARY "d:/Projects/FETOL/dev/bond_lib/Release/bond.lib")   
-ENDIF()
-
-##################################################################################
-#  Java
-##############################################################################
-### FindJNI.cmake
-#find_package(JNI REQUIRED) 
-#SET(JNI_INCLUDE_DIRS ${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2} ${JAVA_AWT_INCLUDE_PATH})
-#SET(JNI_LIBRARIES ${JAVA_AWT_LIBRARY} ${JAVA_JVM_LIBRARY})
-#SET(JNI_FOUND 1) 
\ No newline at end of file
diff --git a/CMake/cmake_config_files/CSE01.config.cmake b/CMake/cmake_config_files/CSE01.config.cmake
deleted file mode 100644
index cad3f60ce31edac1069d1edce3fdd43b49a72b6e..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/CSE01.config.cmake
+++ /dev/null
@@ -1,23 +0,0 @@
-# Alena
-#################################################################################
-#  BOOST  
-#################################################################################
-#SET(BOOST_VERSION "1.60.0")
-#SET(BOOST_ROOT "d:/boost/boost_1_60_0")
-#SET(BOOST_DIR ${BOOST_ROOT})
-#SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
-#################################################################################
-
-#################################################################################
-#  METIS  
-#################################################################################
-#IF(${USE_METIS})
-#  SET(METIS_INCLUDEDIR "d:/metis-5.1.0/include")
-#  SET(METIS_DEBUG_LIBRARY "d:/metis-5.1.0/build/libmetis/Debug/metis.lib") 
-#  SET(METIS_RELEASE_LIBRARY "d:/metis-5.1.0/build/libmetis/Release/metis.lib") 
-#ENDIF()
-#################################################################################
-#  VTK  
-#################################################################################
-#set(VTK_DIR "d:/tools/VTK/build/VTK-8.2.0")
-#################################################################################
\ No newline at end of file
diff --git a/CMake/cmake_config_files/ELENDUR.config.cmake b/CMake/cmake_config_files/ELENDUR.config.cmake
deleted file mode 100644
index 5ef9d62dd8dbd7d352dfa812c05327b652ee83df..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/ELENDUR.config.cmake
+++ /dev/null
@@ -1,18 +0,0 @@
-# Hiwi Rechner Olli
-#################################################################################
-#  BOOST  
-#################################################################################
-SET(BOOST_VERSION "1.65.1")
-SET(BOOST_ROOT "c:/Libraries/boost/boost_1_65_1")
-SET(BOOST_DIR ${BOOST_ROOT})
-SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stage/lib")  
-#################################################################################
-
-#################################################################################
-#################################################################################
-#  METIS  
-#################################################################################
-SET(METIS_INCLUDEDIR "c:/Libraries/metis/metis-5.1.0/include")
-SET(METIS_DEBUG_LIBRARY "c:/Libraries/metis/metis-5.1.0/MSVC2015/libmetis/Debug/metis.lib")
-SET(METIS_RELEASE_LIBRARY "c:/Libraries/metis/metis-5.1.0/MSVC2015/libmetis/Release/metis.lib")
-
diff --git a/CMake/cmake_config_files/ELLADAN.config.cmake b/CMake/cmake_config_files/GITLAB-RUNNER02.config.cmake
similarity index 83%
rename from CMake/cmake_config_files/ELLADAN.config.cmake
rename to CMake/cmake_config_files/GITLAB-RUNNER02.config.cmake
index ff87d1b9d8d8aac05fe72cb1e303ad7de6663e8f..3c2bede02733eb2b1ae25b2cbd9141369fe473f7 100644
--- a/CMake/cmake_config_files/ELLADAN.config.cmake
+++ b/CMake/cmake_config_files/GITLAB-RUNNER02.config.cmake
@@ -4,6 +4,5 @@
 # OS:          Ubuntu 20.04
 #################################################################################
 
-set(NVCUDASAMPLES_ROOT "~/cuda-samples/Common")
-
 
+set(CMAKE_CUDA_ARCHITECTURES 70)
\ No newline at end of file
diff --git a/CMake/cmake_config_files/HINNY.config.cmake b/CMake/cmake_config_files/HINNY.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..869c14d5b38989500b38e8b1c4d09d301a861d9d
--- /dev/null
+++ b/CMake/cmake_config_files/HINNY.config.cmake
@@ -0,0 +1 @@
+SET(CMAKE_CUDA_ARCHITECTURES "86")
\ No newline at end of file
diff --git a/CMake/cmake_config_files/HOST2.config.cmake b/CMake/cmake_config_files/HOST2.config.cmake
deleted file mode 100644
index 3bfbaf2ddead8a6cc0eafa5c2c033fc54da6cb7f..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/HOST2.config.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Konstantin Kutscher
-# OS:          ???
-#################################################################################
-
-#################################################################################
-#  BOOST  
-#################################################################################
-SET(BOOST_VERSION "1.47")
-SET(BOOST_USE_MULTITHREAD ON)
-SET(BOOST_USE_STATIC_LIBS ON)
-
-SET(BOOST_ROOT "/host/tools/boost/boost_1_47_0")
-SET(BOOST_LIBRARYDIR "/host/tools/boost/boost_1_47_0/stageLinux/lib")
-
-#################################################################################
-#  METIS  
-#################################################################################
-SET(METIS_INCLUDEDIR "c:/Tools/metis-5.0.1/include")
-SET(METIS_DEBUG_LIBRARY "c:/Tools/metis-5.0.1/build/libmetis/Debug/metis.lib")
-SET(METIS_RELEASE_LIBRARY "c:/Tools/metis-5.0.1/build/libmetis/Release/metis.lib")
diff --git a/CMake/cmake_config_files/LISE.config.cmake b/CMake/cmake_config_files/LISE.config.cmake
deleted file mode 100644
index 705f02c62f5beb1eb5544af88d9a248c65e684ba..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/LISE.config.cmake
+++ /dev/null
@@ -1,15 +0,0 @@
-#################################################################################
-#  METIS  
-#################################################################################
-IF(${USE_METIS})
-  SET(METIS_INCLUDEDIR "/home/niikonst/metis-5.1.0/include")
-  SET(METIS_DEBUG_LIBRARY "/home/niikonst/metis-5.1.0/build/Linux-x86_64/libmetis/libmetis.a") 
-  SET(METIS_RELEASE_LIBRARY "/home/niikonst/metis-5.1.0/build/Linux-x86_64/libmetis/libmetis.a")
-ENDIF()
-#################################################################################
-#  BOOST  
-#################################################################################
-SET(BOOST_VERSION "1.72.0")
-SET(BOOST_ROOT "/sw/tools/boost/1.72.0/skl/openmpi.3.1.5-gcc.9.2.0")
-SET(BOOST_DIR ${BOOST_ROOT})
-SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/lib")  
\ No newline at end of file
diff --git a/CMake/cmake_config_files/MOLLOK.config.cmake b/CMake/cmake_config_files/MOLLOK.config.cmake
index 03f83455175719ff5b0e786994213bbaa0fbd29e..f700f3cd7a4b5669ef6ffee9436a1528e50e9dc9 100644
--- a/CMake/cmake_config_files/MOLLOK.config.cmake
+++ b/CMake/cmake_config_files/MOLLOK.config.cmake
@@ -4,8 +4,12 @@
 # OS:          Windows 10
 #################################################################################
 
-#SET TO CORRECT PATH:
-SET(CMAKE_CUDA_ARCHITECTURES 86)
+# cuda compute capability
+set(CMAKE_CUDA_ARCHITECTURES 86)
 
-SET(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
-LIST(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
+# numerical tests location of the grids
+set(PATH_NUMERICAL_TESTS "D:/out/numericalTests/")
+list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
+
+# add invidual apps here
+list(APPEND USER_APPS "apps/gpu/LBM/WTG_RUB")
\ No newline at end of file
diff --git a/CMake/cmake_config_files/MULE.config.cmake b/CMake/cmake_config_files/MULE.config.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..02f61b7988c5b3af9cd58bc52e46b1b2edfe8aae
--- /dev/null
+++ b/CMake/cmake_config_files/MULE.config.cmake
@@ -0,0 +1 @@
+SET(CMAKE_CUDA_ARCHITECTURES "75")
\ No newline at end of file
diff --git a/CMake/cmake_config_files/PHOENIX.config.cmake b/CMake/cmake_config_files/PHOENIX.config.cmake
index 2f576538c106a6a4d83509a49a1408a8d63efbdb..fb76d6d837630d86cd20e18a50296ea0bf003485 100644
--- a/CMake/cmake_config_files/PHOENIX.config.cmake
+++ b/CMake/cmake_config_files/PHOENIX.config.cmake
@@ -4,44 +4,24 @@
 # OS:          CentOS 7.3
 #################################################################################
 
-SET(BOOST_VERSION "1.63.0" CACHE STRING "std: 1.63.0")
-
-#################################################################################
-#  METIS  
-#################################################################################
-SET(METIS_INCLUDEDIR "/cluster/lib/metis/5.1.0/gcc/include")
-SET(METIS_DEBUG_LIBRARY "/cluster/lib/metis/5.1.0/gcc/lib/libmetis.a")
-SET(METIS_RELEASE_LIBRARY "/cluster/lib/metis/5.1.0/gcc/lib/libmetis.a")
-
-
 #################################################################################
-#  PE  
+#  PE (legacy)
 #################################################################################
 IF(${USE_DEM_COUPLING})
   SET(PE_BINARY_DIR "/home/irmb/walberla-git/build" CACHE PATH "pe binary dir")
   SET(PE_ROOT "/home/irmb/walberla-git" CACHE PATH "pe root")
- 
-  SET(PE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/pe/libpe.a) 
+
+  SET(PE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/pe/libpe.a)
   SET(PE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/pe/libpe.a)
-  SET(BLOCKFOREST_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/blockforest/libblockforest.a) 
+  SET(BLOCKFOREST_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/blockforest/libblockforest.a)
   SET(BLOCKFOREST_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/blockforest/libblockforest.a)
-  SET(DOMAIN_DECOMPOSITION_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/libdomain_decomposition.a) 
+  SET(DOMAIN_DECOMPOSITION_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/libdomain_decomposition.a)
   SET(DOMAIN_DECOMPOSITION_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/domain_decomposition/libdomain_decomposition.a)
-  SET(GEOMETRY_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/geometry/libgeometry.a) 
+  SET(GEOMETRY_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/geometry/libgeometry.a)
   SET(GEOMETRY_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/geometry/libgeometry.a)
-  SET(CORE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/core/libcore.a) 
+  SET(CORE_DEBUG_LIBRARY ${PE_BINARY_DIR}/src/core/libcore.a)
   SET(CORE_RELEASE_LIBRARY ${PE_BINARY_DIR}/src/core/libcore.a)
-  
 ENDIF()
 
-
-
-SET(BOOST_ROOT  "/cluster/lib/boost/1.63.0/gcc"  CACHE PATH "BOOST_ROOT")
-SET(BOOST_LIBRARYDIR  "/cluster/lib/boost/1.63.0/gcc/lib"  CACHE PATH "BOOST_LIBRARYDIR")
-
-#SET(VTK_DIR "/cluster/lib/vtk/8.1.0/lib/cmake/vtk-8.1" CACHE PATH "VTK directory override" FORCE)
-#SET(VTK_DIR "/home/irmb/tools/VTK/build/VTK-8.2.0" CACHE PATH "VTK directory override" FORCE)
-#SET(VTK_DIR "/home/stelenz/software/vtk/VTK-8.1.0/build" CACHE PATH "VTK directory override" FORCE)
-
 ## nvidia
 set(CMAKE_CUDA_ARCHITECTURES 60) # NVIDIA Tesla P100
\ No newline at end of file
diff --git a/CMake/cmake_config_files/SAMWEIS.config.cmake b/CMake/cmake_config_files/SAMWEIS.config.cmake
deleted file mode 100644
index 5b08c08610c37e1a38947d8842ca744622ffa524..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/SAMWEIS.config.cmake
+++ /dev/null
@@ -1,24 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Lennard Lux
-# OS:          Windows 10
-#################################################################################
-
-#################################################################################
-#  BOOST  
-#################################################################################
-SET(BOOST_VERSION "1.60.0")
-SET(BOOST_ROOT "c:/Tools/boost/boost_1_60_0")
-SET(BOOST_DIR ${BOOST_ROOT})
-SET(BOOST_LIBRARYDIR ${BOOST_ROOT}"/stageMSVC64/lib")  
-#################################################################################
-#  VTK  
-#################################################################################
-set(VTK_DIR "E:/Tools/VTK-8.1.2-build")
-
-#################################################################################
-#  METIS  
-#################################################################################
-SET(METIS_INCLUDEDIR "c:/Tools/metis-5.1.0/include")
-SET(METIS_DEBUG_LIBRARY "c:/Tools/metis-5.1.0/build/libmetis/Debug/metis.lib")
-SET(METIS_RELEASE_LIBRARY "c:/Tools/metis-5.1.0/build/libmetis/Release/metis.lib")
diff --git a/CMake/cmake_config_files/TESLA01.config.cmake b/CMake/cmake_config_files/TESLA01.config.cmake
deleted file mode 100644
index ab7f92aea592fb4f8f034ebc548df05c46c58382..0000000000000000000000000000000000000000
--- a/CMake/cmake_config_files/TESLA01.config.cmake
+++ /dev/null
@@ -1,11 +0,0 @@
-#################################################################################
-# VirtualFluids MACHINE FILE
-# Responsible: Soeren Peters
-# OS:          Windows 10
-#################################################################################
-
-SET(BOOST_ROOT  "C:\\Libraries\\boost_1_65_1"  CACHE PATH "BOOST_ROOT")
-SET(BOOST_LIBRARYDIR  "C:\\Libraries\\boost_1_65_1\\lib" CACHE PATH "BOOST_LIBRARYDIR")
-
-SET(VTK_DIR "C:/Libraries/VTK-8.0.1/build")
-
diff --git a/CMake/cmake_config_files/TESLA03.config.cmake b/CMake/cmake_config_files/TESLA03.config.cmake
index f319e7bff9de9645d72b1598cec77652375b4d07..e29c7306c5448b97eefed9d7a41871a5e4d3b589 100644
--- a/CMake/cmake_config_files/TESLA03.config.cmake
+++ b/CMake/cmake_config_files/TESLA03.config.cmake
@@ -4,16 +4,12 @@
 # OS:          Windows 10
 #################################################################################
 
-#Don't change:
-SET(METIS_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/metis/metis-5.1.0 CACHE PATH "METIS ROOT")
-SET(GMOCK_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/googletest CACHE PATH "GMOCK ROOT")
-SET(JSONCPP_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/jsoncpp CACHE PATH "JSONCPP ROOT")
-SET(FFTW_ROOT ${CMAKE_SOURCE_DIR}/3rdParty/fftw/fftw-3.3.7 CACHE PATH "JSONCPP ROOT")
-
+# cuda compute capability
 SET(CMAKE_CUDA_ARCHITECTURES 52)
 
-SET(VTK_DIR "F:/Libraries/vtk/VTK-8.2.0/build" CACHE PATH "VTK directory override" FORCE)
-
+# numerical tests location of the grids
 SET(PATH_NUMERICAL_TESTS "E:/temp/numericalTests/")
-LIST(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
-SET(CMAKE_CUDA_ARCHITECTURES 52)
\ No newline at end of file
+list(APPEND VF_COMPILER_DEFINITION "PATH_NUMERICAL_TESTS=${PATH_NUMERICAL_TESTS}")
+
+# add invidual apps here
+list(APPEND USER_APPS "apps/gpu/LBM/MusselOyster")
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef024170265d9270d192e12f6b301224355f9ac3..9d2a9897abb08d9d8d87e8914f1774b7892986c6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -207,3 +207,6 @@ if (BUILD_VF_PYTHON_BINDINGS)
     add_subdirectory(${VF_THIRD_DIR}/pybind11/pybind11-2.6.0)
     add_subdirectory(pythonbindings)
 endif()
+
+
+vf_load_user_apps()
\ No newline at end of file
diff --git a/Containers/dockerfiles/Ubuntu20_04.Dockerfile b/Containers/dockerfiles/Ubuntu20_04.Dockerfile
index 41941824c74e2c580a6806d60ffc668be8195a0e..21511a97adab3694540c1d13a699f4d10b3d0356 100644
--- a/Containers/dockerfiles/Ubuntu20_04.Dockerfile
+++ b/Containers/dockerfiles/Ubuntu20_04.Dockerfile
@@ -1,7 +1,7 @@
 # VirtualFluids BuildDependencies:
 # Ubuntu 20.04
 # general tools: wget, unzip, git
-# CMake 3.22.3
+# CMake 3.24.0
 # ccache
 # gcc 9.3 (default)
 # gdb
@@ -11,6 +11,7 @@
 # freeGLUT
 # cppcheck
 # clang 10.0 (default)
+# clangd language server https://clangd.llvm.org/
 # python pip3 with modules: setuptools, wheels, scikit-build, pyvista, numpy, ansible, gcovr
 
 FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
@@ -33,8 +34,10 @@ RUN apt-get update &&   \
     python3-pip         \
     freeglut3-dev       \
     cppcheck            \
+    clangd-12           \
+    && update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-12 100 \
     && mkdir -p /usr/local/cmake/ && cd /usr/local/cmake/ \
-    && version=3.22 && build=3 \
+    && version=3.24 && build=0 \
     && wget https://cmake.org/files/v$version/cmake-$version.$build-linux-x86_64.tar.gz \
     && tar -xzvf cmake-$version.$build-linux-x86_64.tar.gz                              \
     && ln -s /usr/local/cmake/cmake-$version.$build-linux-x86_64/bin/* /usr/local/bin/  \
diff --git a/apps/gpu/HULC/main.cpp b/apps/gpu/HULC/main.cpp
index 35d70d1f49b344e0e5530c46582940ec581e3c7f..80f8ba4c62b3b0af08425f839d0f802a568db034 100644
--- a/apps/gpu/HULC/main.cpp
+++ b/apps/gpu/HULC/main.cpp
@@ -59,7 +59,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setDevices(StringUtil::toIntVector(input->getValue("Devices")));
     para->setOutputPath(_path);
     para->setOutputPrefix(_prefix);
-    para->setFName(_path + "/" + _prefix);
+    para->setPathAndFilename(_path + "/" + _prefix);
     para->setPrintFiles(false);
     para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
     para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
@@ -73,9 +73,9 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
     para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
     para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
+    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
+    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
+    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
     para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
     para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
     para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
@@ -90,8 +90,8 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
     para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
     //////////////////////////////////////////////////////////////////////////
-    para->setViscosity(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocity(StringUtil::toFloat(input->getValue("Velocity_LB")));
+    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
+    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
     para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
     para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
     para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
diff --git a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
index 900bc51870091e34b5f1ebae0980f783310d18b3..8b61ad8db1c0d723cbbca698f737e97da62a6a87 100644
--- a/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
+++ b/apps/gpu/LBM/ActuatorLine/ActuatorLine.cpp
@@ -120,7 +120,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -139,15 +139,13 @@ void multipleLevel(const std::string& configPath)
 
     para->setOutputPrefix( simulationName );
 
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
     para->setPrintFiles(true);
 
     para->setMaxLevel(1);
 
 
-    para->setVelocity(velocityLB);
-    para->setViscosity(viscosityLB);
+    para->setVelocityLB(velocityLB);
+    para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
     para->setMainKernel("CumulantK17CompChim");
@@ -159,8 +157,8 @@ void multipleLevel(const std::string& configPath)
         vz  = (real)0.0;
     });
 
-    para->setTOut( uint(tOut/dt) );
-    para->setTEnd( uint(tEnd/dt) );
+    para->setTimestepOut( uint(tOut/dt) );
+    para->setTimestepEnd( uint(tEnd/dt) );
 
     para->setIsBodyForce( true );
 
@@ -173,7 +171,7 @@ void multipleLevel(const std::string& configPath)
     gridBuilder->setVelocityBoundaryCondition(SideType::MZ,  velocityLB,  0.0, 0.0);
     gridBuilder->setVelocityBoundaryCondition(SideType::PZ,  velocityLB,  0.0, 0.0);
     gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0);
-    
+
     bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
     bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
 
@@ -195,7 +193,7 @@ void multipleLevel(const std::string& configPath)
     std::vector<real> probeCoordsZ = {3*reference_diameter,3*reference_diameter,3*reference_diameter};
     pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
     // pointProbe->addProbePointsFromXNormalPlane(2*D, 0.0, 0.0, L_y, L_z, (uint)L_y/dx, (uint)L_z/dx);
-    
+
     pointProbe->addStatistic(Statistic::Means);
     pointProbe->addStatistic(Statistic::Variances);
     para->addProbe( pointProbe );
diff --git a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
index 2983cddc6ac2a3f6767f7b53427eff8fb1baefb1..8acec35bed45d53f8ba752855edb5c43d60e8fac 100644
--- a/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
+++ b/apps/gpu/LBM/BoundaryLayer/BoundaryLayer.cpp
@@ -52,6 +52,7 @@
 #include "VirtualFluids_GPU/PreCollisionInteractor/PrecursorWriter.h"
 #include "VirtualFluids_GPU/PreCollisionInteractor/VelocitySetter.h"
 #include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h"
 
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 
@@ -87,7 +88,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////^
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -172,32 +173,48 @@ void multipleLevel(const std::string& configPath)
 
     para->setOutputPrefix( simulationName );
 
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
     para->setPrintFiles(true);
 
     para->setForcing(pressureGradientLB, 0, 0);
-    para->setVelocity(velocityLB);
-    para->setViscosity(viscosityLB);
+    para->setVelocityLB(velocityLB);
+    para->setViscosityLB(viscosityLB);
     para->setVelocityRatio( dx / dt );
     para->setViscosityRatio( dx*dx/dt );
     para->setDensityRatio( 1.0 );
 
-    if(para->getUseAMD())
-        para->setMainKernel("TurbulentViscosityCumulantK17CompChim");
-    else
-        para->setMainKernel("CumulantK17CompChim");
+    para->setMainKernel("TurbulentViscosityCumulantK17CompChim");
 
     para->setIsBodyForce( config.getValue<bool>("bodyForce") );
 
+<<<<<<< HEAD
     para->setTStartOut(uint(tStartOut/dt) );
     para->setTOut( uint(tOut/dt) );
     para->setTEnd( uint(tEnd/dt) );;
+=======
+    para->setTimestepStartOut(uint(tStartOut/dt) );
+    para->setTimestepOut( uint(tOut/dt) );
+    para->setTimestepEnd( uint(tEnd/dt) );
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    SPtr<TurbulenceModelFactory> tmFactory = SPtr<TurbulenceModelFactory>( new TurbulenceModelFactory(para) );
+    tmFactory->readConfigFile( config );
+    
+    // tmFactory->setTurbulenceModel(TurbulenceModel::AMD);
+    // tmFactory->setModelConstant(config.getValue<real>("SGSconstant"));
+>>>>>>> 5c564ec3d3b15718f16c478a49878b98f3e28921
 
     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     gridBuilder->addCoarseGrid(0.0, 0.0, 0.0,
                                 L_x,  L_y,  L_z, dx);
+<<<<<<< HEAD
+=======
+    // gridBuilder->setNumberOfLayers(12, 8);
+
+    // gridBuilder->addGrid( new Cuboid( 0.0, 0.0, 0.0, L_x,  L_y,  0.3*L_z) , 1 );
+    // para->setMaxLevel(2);
+>>>>>>> 5c564ec3d3b15718f16c478a49878b98f3e28921
 
     gridBuilder->setPeriodicBoundaryCondition(!readPrecursor, true, false);
 
@@ -224,6 +241,7 @@ void multipleLevel(const std::string& configPath)
                                             0.0, 0.0, 1.0,              // wall normals
                                             samplingOffset, z0/dx);     // wall model settinng
     para->setHasWallModelMonitor(true);
+<<<<<<< HEAD
     bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressBounceBack);
 
     // para->setHasWallModelMonitor(true);
@@ -231,17 +249,28 @@ void multipleLevel(const std::string& configPath)
     
     // gridBuilder->setVelocityBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
     // gridBuilder->setVelocityBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0);
+=======
+    bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressPressureBounceBack);
+
+>>>>>>> 5c564ec3d3b15718f16c478a49878b98f3e28921
     gridBuilder->setSlipBoundaryCondition(SideType::PZ,  0.0,  0.0, 0.0);
-    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressible);
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipBounceBack); 
+    
 
 
 
 
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)0.0;
+<<<<<<< HEAD
         vx  = (u_star/kappa * log(coordZ/z0) + c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/L_z)/(pow(coordZ/L_z,c2o1)+c1o1))  * dt / dx; 
         vy  = c2o1*sin(cPi*c16o1*coordX/L_x)*sin(cPi*c8o1*coordZ/L_z)/(pow(coordZ/L_z,c2o1)+c1o1)  * dt / dx; 
         vz  = c8o1*u_star/c4o10*(sin(cPi*c8o1*coordY/L_y)*sin(cPi*c8o1*coordZ/L_z)+sin(cPi*c8o1*coordX/L_x))/(pow(L_z*c1o2-coordZ, c2o1)+c1o1) * dt / dx;
+=======
+        vx  = (u_star/0.4 * log(coordZ/z0) + 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1))  * dt / dx; 
+        vy  = 2.0*sin(cPi*16.0f*coordX/L_x)*sin(cPi*8.0f*coordZ/H)/(pow(coordZ/H,c2o1)+c1o1)  * dt / dx; 
+        vz  = 8.0*u_star/0.4*(sin(cPi*8.0*coordY/H)*sin(cPi*8.0*coordZ/H)+sin(cPi*8.0*coordX/L_x))/(pow(L_z/2.0-coordZ, c2o1)+c1o1) * dt / dx;
+>>>>>>> 5c564ec3d3b15718f16c478a49878b98f3e28921
     });
 
 
@@ -269,7 +298,7 @@ void multipleLevel(const std::string& configPath)
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
+    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory, tmFactory);
     sim.run();
 }
 
diff --git a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
index c02c3b41e633aa6ae63917c6e9e12c2e1a6f2235..5900a9e244febd423fcd05771c6c2deb0e00b014 100644
--- a/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
+++ b/apps/gpu/LBM/DrivenCavity/DrivenCavity.cpp
@@ -1,375 +1,227 @@
-
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file LidDrivenCavity.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr, Stephan Lenz
+//=======================================================================================
 #define _USE_MATH_DEFINES
-#include <string>
-#include <sstream>
-#include <iostream>
-#include <stdexcept>
-#include <fstream>
 #include <exception>
+#include <fstream>
+#include <iostream>
 #include <memory>
-#include <filesystem>
+#include <sstream>
+#include <stdexcept>
+#include <string>
 
 //////////////////////////////////////////////////////////////////////////
 
 #include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "Core/StringUtilities/StringUtil.h"
-
+#include "Core/LbmOrGks.h"
+#include "Core/Logger/Logger.h"
 #include "Core/VectorTypes.h"
+#include "PointerDefinitions.h"
 
-#include "basics/config/ConfigurationFile.h"
-
-#include "logger/Logger.h"
+#include <logger/Logger.h>
 
 //////////////////////////////////////////////////////////////////////////
 
+#include "GridGenerator/grid/BoundaryConditions/Side.h"
 #include "GridGenerator/grid/GridBuilder/LevelGridBuilder.h"
 #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
-#include "GridGenerator/grid/BoundaryConditions/Side.h"
 #include "GridGenerator/grid/GridFactory.h"
-
-#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
-#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
-#include "GridGenerator/io/STLReaderWriter/STLReader.h"
-#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
+#include "GridGenerator/geometries/Cuboid/Cuboid.h"
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Output/FileWriter.h"
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
-
+#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
 #include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+#include "VirtualFluids_GPU/Output/FileWriter.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
 
 //////////////////////////////////////////////////////////////////////////
 
-//#include "GksMeshAdapter/GksMeshAdapter.h"
-
-//#include "GksVtkAdapter/VTKInterface.h"
-//
-//#include "GksGpu/DataBase/DataBase.h"
-//#include "GksGpu/Parameters/Parameters.h"
-//#include "GksGpu/Initializer/Initializer.h"
-//
-//#include "GksGpu/FlowStateData/FlowStateDataConversion.cuh"
-//
-//#include "GksGpu/BoundaryConditions/BoundaryCondition.h"
-//#include "GksGpu/BoundaryConditions/IsothermalWall.h"
-//
-//#include "GksGpu/TimeStepping/NestedTimeStep.h"
-//
-//#include "GksGpu/Analyzer/CupsAnalyzer.h"
-//#include "GksGpu/Analyzer/ConvergenceAnalyzer.h"
-//
-//#include "GksGpu/CudaUtility/CudaUtility.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//LbmOrGks lbmOrGks = GKS;
-const LbmOrGks lbmOrGks = LBM;
-
-const real L  = 1.0;
-
-const real Re = 1000.0;
-
-const real velocity  = 1.0;
-
-const real dt = (real)0.5e-3;
-
-const uint nx = 64;
-
-const std::string path("output/");
-const std::string gridPath("grid/");
-
-const std::string simulationName("DrivenCavityChim");
-
-const uint timeStepOut = 10000;
-const uint timeStepEnd = 250000;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void multipleLevel(const std::string& configPath)
+int main()
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
+    try {
+         vf::logging::Logger::initalizeLogger();
+        //////////////////////////////////////////////////////////////////////////
+        // Simulation parameters
+        //////////////////////////////////////////////////////////////////////////
+        std::string path("./output/DrivenCavity");
+        std::string simulationName("LidDrivenCavity");
 
-    auto gridFactory = GridFactory::make();
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+        const real L = 1.0;
+        const real Re = 1000.0;
+        const real velocity = 1.0;
+        const real dt = (real)0.5e-3;
+        const uint nx = 64;
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        const uint timeStepOut = 1000;
+        const uint timeStepEnd = 10000;
 
-    real dx = L / real(nx);
+        //////////////////////////////////////////////////////////////////////////
+        // setup logger
+        //////////////////////////////////////////////////////////////////////////
 
-    gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L,
-                                0.5 * L,  0.5 * L,  0.5 * L, dx);
+        logging::Logger::addStream(&std::cout);
+        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+        logging::Logger::timeStamp(logging::Logger::ENABLE);
+        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
 
-    // gridBuilder->addCoarseGrid(-2.0 * dx, -0.5 * L, -0.5 * L,
-    //                             2.0 * dx,  0.5 * L,  0.5 * L, dx);
+        //////////////////////////////////////////////////////////////////////////
+        // setup gridGenerator
+        //////////////////////////////////////////////////////////////////////////
 
-    auto refBox = new Cuboid(-0.1 * L, -0.1 * L, -0.1 * L,
-                              0.1 * L,  0.1 * L,  0.1 * L);
+        auto gridFactory = GridFactory::make();
+        gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+        auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
-    gridBuilder->addGrid(refBox, 1);
+        //////////////////////////////////////////////////////////////////////////
+        // create grid
+        //////////////////////////////////////////////////////////////////////////
 
-    gridBuilder->setNumberOfLayers(0, 0);
+        real dx = L / real(nx);
 
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+        gridBuilder->addCoarseGrid(-0.5 * L, -0.5 * L, -0.5 * L, 0.5 * L, 0.5 * L, 0.5 * L, dx);
 
-    gridBuilder->buildGrids(lbmOrGks, false); // buildGrids() has to be called before setting the BCs!!!!
+        gridBuilder->addGrid(new Cuboid(-0.25, -0.25, -0.25, 0.25, 0.25, 0.25), 1); // add fine grid
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
-    if( lbmOrGks == LBM )
-    {
+        gridBuilder->buildGrids(LbmOrGks::LBM, false);
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        vf::basics::ConfigurationFile config;
-        config.load(configPath);
-
-        SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
-        BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////
+        // compute parameters in lattice units
+        //////////////////////////////////////////////////////////////////////////
 
         const real velocityLB = velocity * dt / dx; // LB units
 
-        const real vx = velocityLB / (real)sqrt(2.0); // LB units
-        const real vy = velocityLB / (real)sqrt(2.0); // LB units
+        const real vxLB = velocityLB / sqrt(2.0); // LB units
+        const real vyLB = velocityLB / sqrt(2.0); // LB units
 
         const real viscosityLB = nx * velocityLB / Re; // LB units
 
-        VF_LOG_INFO("velocity  [dx/dt] = {}", velocityLB);
-        VF_LOG_INFO("viscosity [dx^2/dt] = {}", viscosityLB);
-
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////
+        // set parameters
+        //////////////////////////////////////////////////////////////////////////
+        SPtr<Parameter> para = std::make_shared<Parameter>();
 
-        para->setDevices(std::vector<uint>{(uint)0});
-
-        para->setOutputPath( path ); // optional, default is output/
-        para ->setGridPath( gridPath );  // optional, default is grid/
-
-        para->setOutputPrefix( simulationName );
+        para->setOutputPath(path);
+        para->setOutputPrefix(simulationName);
 
         para->setPrintFiles(true);
 
-        para->setMaxLevel(2);
-
-        para->setVelocity(velocityLB);
-        para->setViscosity(viscosityLB);
+        para->setVelocityLB(velocityLB);
+        para->setViscosityLB(viscosityLB);
 
         para->setVelocityRatio(velocity / velocityLB);
+        para->setDensityRatio(1.0);
 
-        //para->setMainKernel("CumulantK17CompChim");
-
-        para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
-            rho = (real)0.0;
-            vx  = (real)0.0; //(6 * velocityLB * coordZ * (L - coordZ) / (L * L));
-            vy  = (real)0.0;
-            vz  = (real)0.0;
-        });
+        para->setTimestepOut(timeStepOut);
+        para->setTimestepEnd(timeStepEnd);
 
-        para->setTOut( timeStepOut );
-        para->setTEnd( timeStepEnd );
+        para->setMainKernel("CumulantK17CompChimRedesigned");
 
-        /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////
+        // set boundary conditions
+        //////////////////////////////////////////////////////////////////////////
 
         gridBuilder->setNoSlipBoundaryCondition(SideType::PX);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MX);
         gridBuilder->setNoSlipBoundaryCondition(SideType::PY);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MY);
-        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vx, vx, 0.0);
+        gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, vyLB, 0.0);
         gridBuilder->setNoSlipBoundaryCondition(SideType::MZ);
 
+        BoundaryConditionFactory bcFactory;
+
         bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack);
         bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible);
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        gridBuilder->writeGridsToVtk(para->getGridPath());
+        //////////////////////////////////////////////////////////////////////////
+        // set copy mesh to simulation
+        //////////////////////////////////////////////////////////////////////////
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
 
         auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
-
-        auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+        SPtr<GridProvider> gridGenerator =
+            GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+
+
+        //////////////////////////////////////////////////////////////////////////
+        // run simulation
+        //////////////////////////////////////////////////////////////////////////
+
+        VF_LOG_INFO("Start Running DrivenCavity Showcase...");
+        printf("\n");
+        VF_LOG_INFO("world parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("dt [s]                 = {}", dt);
+        VF_LOG_INFO("world_length   [m]     = {}", L);
+        VF_LOG_INFO("world_velocity [m/s]   = {}", velocity);
+        VF_LOG_INFO("dx [m]                 = {}", dx);
+        printf("\n");
+        VF_LOG_INFO("LB parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("Re                     = {}", Re);
+        VF_LOG_INFO("lb_velocity [dx/dt]    = {}", velocityLB);
+        VF_LOG_INFO("lb_viscosity [dx^2/dt] = {}", viscosityLB);
+        VF_LOG_INFO("lb_vx [dx/dt] (lb_velocity/sqrt(2)) = {}", vxLB);
+        VF_LOG_INFO("lb_vy [dx/dt] (lb_velocity/sqrt(2)) = {}", vyLB);
+        printf("\n");
+        VF_LOG_INFO("simulation parameter:");
+        VF_LOG_INFO("--------------");
+        VF_LOG_INFO("nx                     = {}", nx);
+        VF_LOG_INFO("ny                     = {}", nx);
+        VF_LOG_INFO("nz                     = {}", nx);
+        VF_LOG_INFO("number of nodes        = {}", nx * nx * nx);
+        VF_LOG_INFO("n timesteps            = {}", timeStepOut);
+        VF_LOG_INFO("write_nth_timestep     = {}", timeStepEnd);
+        VF_LOG_INFO("output_path            = {}", path);
 
         Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
         sim.run();
 
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    }
-    else
-    {
-     //   CudaUtility::setCudaDevice(0);
-     //
-     //   Parameters parameters;
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-        //const real vx = velocity / sqrt(2.0);
-        //const real vy = velocity / sqrt(2.0);
-
-     //   parameters.K  = 2.0;
-     //   parameters.Pr = 1.0;
-     //
-     //   const real Ma = 0.1;
-
-     //   real rho = 1.0;
-
-     //   real cs = velocity / Ma;
-     //   real lambda = c1o2 * ( ( parameters.K + 5.0 ) / ( parameters.K + 3.0 ) ) / ( cs * cs );
-
-     //   const real mu = velocity * L * rho / Re;
-
-     //   *logging::out << logging::Logger::INFO_HIGH << "mu  = " << mu << " m^2/s\n";
-
-     //   *logging::out << logging::Logger::INFO_HIGH << "CFL = " << dt * ( velocity + cs ) / dx << "\n";
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   parameters.mu = mu;
-
-     //   parameters.dt = dt;
-     //   parameters.dx = dx;
-
-     //   parameters.lambdaRef = lambda;
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   GksMeshAdapter meshAdapter( gridBuilder );
-
-     //   meshAdapter.inputGrid();
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   auto dataBase = std::make_shared<DataBase>( "GPU" );
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   SPtr<BoundaryCondition> bcLid  = std::make_shared<IsothermalWall>( dataBase, Vec3(  vx,  vy, 0.0 ), lambda, false );
-     //   SPtr<BoundaryCondition> bcWall = std::make_shared<IsothermalWall>( dataBase, Vec3( 0.0, 0.0, 0.0 ), lambda, false );
-
-     //   bcLid->findBoundaryCells ( meshAdapter, true,  [&](Vec3 center){ return center.z > 0.5; } );
-     //   bcWall->findBoundaryCells( meshAdapter, false, [&](Vec3 center){ return center.z < 0.5; } );
-
-     //   dataBase->boundaryConditions.push_back( bcLid  );
-     //   dataBase->boundaryConditions.push_back( bcWall );
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   dataBase->setMesh( meshAdapter );
-
-     //   Initializer::interpret(dataBase, [&] ( Vec3 cellCenter ) -> ConservedVariables {
-
-     //       return toConservedVariables( PrimitiveVariables( rho, 0.0, 0.0, 0.0, lambda ), parameters.K );
-     //   });
-
-     //   dataBase->copyDataHostToDevice();
-
-     //   Initializer::initializeDataUpdate(dataBase);
-
-     //   writeVtkXML( dataBase, parameters, 0, path + simulationName + "_0" );
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   CupsAnalyzer cupsAnalyzer( dataBase, false, 60.0, true, 10000 );
-
-     //   ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
-
-     //   cupsAnalyzer.start();
-
-     //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-     //   for( uint iter = 1; iter <= timeStepEnd; iter++ )
-     //   {
-     //       TimeStepping::nestedTimeStep(dataBase, parameters, 0);
-
-     //       if( iter % timeStepOut == 0 )
-     //       {
-     //           dataBase->copyDataDeviceToHost();
-
-     //           writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-     //       }
-     //
-     //       int crashCellIndex = dataBase->getCrashCellIndex();
-     //       if( crashCellIndex >= 0 )
-     //       {
-     //           *logging::out << logging::Logger::LOGGER_ERROR << "Simulation Crashed at CellIndex = " << crashCellIndex << "\n";
-     //           dataBase->copyDataDeviceToHost();
-     //           writeVtkXML( dataBase, parameters, 0, path + simulationName + "_" + std::to_string( iter ) );
-
-     //           break;
-     //       }
-
-     //       dataBase->getCrashCellIndex();
-
-     //       cupsAnalyzer.run( iter, parameters.dt );
-
-     //       convergenceAnalyzer.run( iter );
-     //   }
-    }
-}
-
-int main( int argc, char* argv[])
-{
-    try
-    {
-        vf::logging::Logger::initalizeLogger();
-
-        // assuming that the config files is stored parallel to this file.
-        std::filesystem::path filePath = __FILE__;
-        filePath.replace_filename("configDrivenCavity.txt");
-
-        multipleLevel(filePath.string());
-    }
-    catch (const spdlog::spdlog_ex &ex) {
+    } catch (const spdlog::spdlog_ex &ex) {
         std::cout << "Log initialization failed: " << ex.what() << std::endl;
-    }
-    catch (const std::bad_alloc& e)
-    {
+    } catch (const std::bad_alloc &e) {
         VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
-    }
-    catch (const std::exception& e)
-    {
+    } catch (const std::exception &e) {
         VF_LOG_CRITICAL("exception: {}", e.what());
-    }
-    catch (...)
-    {
+    } catch (...) {
         VF_LOG_CRITICAL("Unknown exception!");
     }
 
-   return 0;
+    return 0;
 }
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
index 0376e376b365c8c8282cc3cd79a5c365b416193f..cd0c73a3ecf8af1a534a53ba1237aea64bfe4b59 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/DrivenCavityMultiGPU.cpp
@@ -6,6 +6,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <filesystem>
 
 #include "mpi.h"
 
@@ -19,6 +20,7 @@
 #include "basics/Core/Logger/Logger.h"
 #include "basics/Core/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
+#include "logger/Logger.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -65,26 +67,14 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-//  Tesla 03
-// const std::string outPath("E:/temp/DrivenCavityMultiGPUResults/");
-// const std::string gridPath = "D:/STLs/DrivenCavity";
-// const std::string simulationName("DrivenCavityMultiGPU");
-
-// Phoenix
-// const std::string outPath("/work/y0078217/Results/DrivenCavityMultiGPUResults/");
-// const std::string gridPath = "/work/y0078217/Grids/GridDrivenCavityMultiGPU/";
-// const std::string simulationName("DrivenCavityMultiGPU");
-
-//  Aragorn
 const std::string outPath("output/DrivenCavity_Results/");
 const std::string gridPath = "output/DrivenCavity_Results/grid/";
-const std::string simulationName("DrivenCavity");
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void multipleLevel(const std::string &configPath)
+void multipleLevel(std::filesystem::path& configPath)
 {
     logging::Logger::addStream(&std::cout);
     logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
@@ -99,8 +89,8 @@ void multipleLevel(const std::string &configPath)
 
     vf::basics::ConfigurationFile config;
     std::cout << configPath << std::endl;
-    config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    config.load(configPath.string());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
 
@@ -125,8 +115,8 @@ void multipleLevel(const std::string &configPath)
     const uint nx       = 64;
     std::string simulationName("DrivenCavityChimMultiGPU");
 
-    // para->setTOut(10000);   // set in config
-    // para->setTEnd(10000);   // set in config
+    // para->setTimestepOut(10000);   // set in config
+    // para->setTimestepEnd(10000);   // set in config
 
     const real dxGrid      = L / real(nx);
     const real velocityLB  = velocity * dt / dxGrid;       // LB units
@@ -134,14 +124,6 @@ void multipleLevel(const std::string &configPath)
     const real vyLB        = velocityLB / (real)sqrt(2.0); // LB units
     const real viscosityLB = nx * velocityLB / Re;         // LB units
 
-    *logging::out << logging::Logger::INFO_HIGH << "velocity  [dx/dt] = " << velocityLB << " \n";
-    *logging::out << logging::Logger::INFO_HIGH << "viscosity [dx^2/dt] = " << viscosityLB << "\n";
-
-    para->setVelocity(velocityLB);
-    para->setViscosity(viscosityLB);
-    para->setVelocityRatio(velocity / velocityLB);
-    para->setDensityRatio((real)1.0); // correct value?
-
     para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
         rho = (real)1.0;
         vx  = (real)(coordX * velocityLB);
@@ -149,27 +131,35 @@ void multipleLevel(const std::string &configPath)
         vz  = (real)(coordZ * velocityLB);
     });
 
+    para->setVelocityLB(velocityLB);
+    para->setViscosityLB(viscosityLB);
+    para->setVelocityRatio(velocity / velocityLB);
+    para->setDensityRatio((real)1.0); // correct value?
+
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    if (para->getOutputPath().size() == 0) {
-        para->setOutputPath(outPath);
-    }
+    if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setOutputPrefix(simulationName);
-    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+
     para->setPrintFiles(true);
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
-    if (useLevels)
-        para->setMaxLevel(2);
-    else
-        para->setMaxLevel(1);
-
     // para->setMainKernel("CumulantK17CompChim");
     para->setMainKernel("CumulantK17CompChimStream");
-    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    vf::logging::Logger::changeLogPath(para->getOutputPath());
+    VF_LOG_INFO("LB parameters:");
+    VF_LOG_INFO("velocity LB [dx/dt]              = {}", vxLB);
+    VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
+    VF_LOG_INFO("dxGrid [-]                       = {}\n", dxGrid);
+
+    VF_LOG_INFO("simulation parameters:");
+    VF_LOG_INFO("mainKernel                       = {}\n", para->getMainKernel());
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -501,8 +491,6 @@ void multipleLevel(const std::string &configPath)
                 }
                 //////////////////////////////////////////////////////////////////////////
             }
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(para->getUseStreams());
 
             // gridBuilder->writeGridsToVtk(outPath +  "/grid/part" + std::to_string(generatePart) + "_");
             // gridBuilder->writeArrows(outPath + "/" + std::to_string(generatePart) + " /arrow");
@@ -528,9 +516,6 @@ void multipleLevel(const std::string &configPath)
             gridBuilder->setVelocityBoundaryCondition(SideType::PZ, vxLB, 0.0, 0.0);
 
             //////////////////////////////////////////////////////////////////////////
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(para->getUseStreams());
-
             gridBuilder->writeGridsToVtk(outPath + "/grid/");
             // gridBuilder->writeArrows(outPath + "/arrow");
 
@@ -557,44 +542,39 @@ void multipleLevel(const std::string &configPath)
 
 int main(int argc, char *argv[])
 {
+    MPI_Init(&argc, &argv);
     std::string str, str2, configFile;
 
     if (argv != NULL) {
 
         try {
             //////////////////////////////////////////////////////////////////////////
+            // assuming that a config files is stored parallel to this file.
+            std::filesystem::path configPath = __FILE__;
 
-            std::string targetPath;
-
-            targetPath = __FILE__;
-
+            // the config file's default name can be replaced by passing a command line argument
+            std::string configName("configDrivenCavityMultiGPU.txt");
             if (argc == 2) {
-                configFile = argv[1];
-                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+                configName = argv[1];
+                std::cout << "Using configFile command line argument: " << configName << std::endl;
             }
 
-#ifdef _WIN32
-            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
-#else
-            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
-#endif
-
-            std::cout << targetPath << std::endl;
-
-            if (configFile.size() == 0) {
-                configFile = targetPath + "configDrivenCavityMultiGPU.txt";
-            }
+            configPath.replace_filename(configName);
 
-            multipleLevel(configFile);
+            multipleLevel(configPath);
 
             //////////////////////////////////////////////////////////////////////////
+        } catch (const spdlog::spdlog_ex &ex) {
+            std::cout << "Log initialization failed: " << ex.what() << std::endl;
         } catch (const std::bad_alloc &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+            VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
         } catch (const std::exception &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+            VF_LOG_CRITICAL("exception: {}", e.what());
         } catch (...) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+            VF_LOG_CRITICAL("Unknown exception!");
         }
     }
+
+    MPI_Finalize();
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt b/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
index f37aac7034140d6d288852f42583c8cce7b840e9..69b6d099c97bd0bebeec71104ad484513f50c6e7 100644
--- a/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
+++ b/apps/gpu/LBM/DrivenCavityMultiGPU/configDrivenCavityMultiGPU.txt
@@ -31,6 +31,6 @@ GridPath=/workspaces/VirtualFluids_dev/output/DrivenCavity_Results/grid/  # Arag
 ##################################################
 #simulation parameter
 ##################################################
-TimeEnd=100000
-TimeOut=10000
+TimeEnd=10000
+TimeOut=1000
 #TimeStartOut=0
\ No newline at end of file
diff --git a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
index 0bef3e75ff93e42f84477431bf78e7cbf7702b17..106c9de23f0458b5ace0fcf988e7d1f53b4e71c9 100644
--- a/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
+++ b/apps/gpu/LBM/MusselOyster/MusselOyster.cpp
@@ -1,5 +1,6 @@
 #define _USE_MATH_DEFINES
 #include <exception>
+#include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <math.h>
@@ -20,6 +21,7 @@
 #include "basics/Core/Logger/Logger.h"
 #include "basics/Core/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
+#include "logger/Logger.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -38,15 +40,15 @@
 
 //////////////////////////////////////////////////////////////////////////
 
+#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -62,18 +64,18 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+// Relative Paths
+const std::string outPath("./output/MusselOysterResults/");
+const std::string gridPathParent = "./output/MusselOysterResults/grid/";
+const std::string stlPath("./stl/MusselOyster/");
+const std::string simulationName("MusselOyster");
+
 // Tesla 03
 // const std::string outPath("E:/temp/MusselOysterResults/");
 // const std::string gridPathParent = "E:/temp/GridMussel/";
 // const std::string stlPath("C:/Users/Master/Documents/MasterAnna/STL/");
 // const std::string simulationName("MusselOyster");
 
-// Aragorn
-const std::string outPath("./output/MusselOysterResults/");
-const std::string gridPathParent = "./output/MusselOysterResults/grid/";
-const std::string stlPath("./stl/MusselOyster/");
-const std::string simulationName("MusselOyster");
-
 // Phoenix
 // const std::string outPath("/work/y0078217/Results/MusselOysterResults/");
 // const std::string gridPathParent = "/work/y0078217/Grids/GridMusselOyster/";
@@ -84,23 +86,23 @@ const std::string simulationName("MusselOyster");
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void multipleLevel(const std::string &configPath)
+void multipleLevel(std::filesystem::path &configPath)
 {
     logging::Logger::addStream(&std::cout);
     logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
     logging::Logger::timeStamp(logging::Logger::ENABLE);
     logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
 
-    vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
+    vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
 
     auto gridFactory = GridFactory::make();
     gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
     vf::basics::ConfigurationFile config;
-    std::cout << configPath << std::endl;
-    config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    config.load(configPath.string());
+    SPtr<Parameter> para =
+        std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -124,7 +126,6 @@ void multipleLevel(const std::string &configPath)
         para->useReducedCommunicationAfterFtoC = false;
     }
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     std::string bivalveType = "MUSSEL"; // "MUSSEL" "OYSTER"
     std::string gridPath(
@@ -135,51 +136,51 @@ void multipleLevel(const std::string &configPath)
     // real dxGrid = (real)1.0; // 1.0
     if (para->getNumprocs() == 8)
         dxGrid = 0.5;
-    real vxLB            = (real)0.051; // LB units
-    real Re              = (real)300.0;
+    real vxLB = (real)0.051; // LB units
+    real Re = (real)300.0;
     real referenceLength = 1.0 / dxGrid; // heightBivalve / dxGrid
-    real viscosityLB     = (vxLB * referenceLength) / Re;
+    real viscosityLB = (vxLB * referenceLength) / Re;
 
-    para->setVelocity(vxLB);
-    para->setViscosity(viscosityLB);
+    para->setVelocityLB(vxLB);
+    para->setViscosityLB(viscosityLB);
     para->setVelocityRatio((real)58.82352941);
     para->setViscosityRatio((real)0.058823529);
     para->setDensityRatio((real)998.0);
 
-    *logging::out << logging::Logger::INFO_HIGH << "bivalveType = " << bivalveType << " \n";
-    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
-    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio() << " \n";
-    *logging::out << logging::Logger::INFO_HIGH
-                  << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << useStreams << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
-
-    // para->setTOut(1000);
-    // para->setTEnd(10000);
+    // para->setTimestepOut(1000);
+    // para->setTimestepEnd(10000);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    if (para->getOutputPath().size() == 0) {
-        para->setOutputPath(outPath);
-    }
     para->setOutputPrefix(simulationName);
-    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
+
     para->setPrintFiles(true);
     std::cout << "Write result files to " << para->getFName() << std::endl;
 
-    if (useLevels)
-        para->setMaxLevel(2);
-    else
-        para->setMaxLevel(1);
-
     para->setUseStreams(useStreams);
     // para->setMainKernel("CumulantK17CompChim");
     para->setMainKernel("CumulantK17CompChimStream");
-    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+    
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    
+    VF_LOG_INFO("LB parameters:");
+    VF_LOG_INFO("velocity LB [dx/dt]              = {}", vxLB);
+    VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
+    VF_LOG_INFO("dxGrid [-]                       = {}\n", dxGrid);
+
+    VF_LOG_INFO("world parameters:");
+    VF_LOG_INFO("velocity [m/s]                   = {}", vxLB * para->getVelocityRatio());
+    VF_LOG_INFO("viscosity [m^2/s]                = {}\n", viscosityLB * para->getViscosityRatio());
+
+    VF_LOG_INFO("simulation parameters:");
+    VF_LOG_INFO("useGridGenerator                 = {}", useGridGenerator);
+    VF_LOG_INFO("useStreams                       = {}", para->getUseStreams());
+    VF_LOG_INFO("number of processes              = {}", para->getNumprocs());
+    VF_LOG_INFO("useReducedCommunicationAfterFtoC = {}", para->useReducedCommunicationAfterFtoC);
+    VF_LOG_INFO("bivalveType                      = {}", bivalveType);
+    VF_LOG_INFO("mainKernel                       = {}\n", para->getMainKernel());
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -194,7 +195,7 @@ void multipleLevel(const std::string &configPath)
         // height MUSSEL = 35.0
         // height Oyster = 72.0
 
-        TriangularMesh *bivalveSTL       = TriangularMesh::make(stlPath + bivalveType + ".stl");
+        TriangularMesh *bivalveSTL = TriangularMesh::make(stlPath + bivalveType + ".stl");
         TriangularMesh *bivalveRef_1_STL = nullptr;
         if (useLevels)
             bivalveRef_1_STL = TriangularMesh::make(stlPath + bivalveType + "_Level1.stl");
@@ -527,8 +528,6 @@ void multipleLevel(const std::string &configPath)
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
             }
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(useStreams);
 
             // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/part" + std::to_string(generatePart) + "_");
             // gridBuilder->writeArrows(outPath + bivalveType + "/" + std::to_string(generatePart) + " /arrow");
@@ -558,8 +557,6 @@ void multipleLevel(const std::string &configPath)
             gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure BC after velocity BCs
 
             //////////////////////////////////////////////////////////////////////////
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(useStreams);
 
             // gridBuilder->writeGridsToVtk(outPath +  bivalveType + "/grid/");
             // gridBuilder->writeArrows ((outPath + bivalveType + "/arrow");
@@ -570,11 +567,10 @@ void multipleLevel(const std::string &configPath)
         bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
         bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
         bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-
     }
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-   auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+    auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
 
     SPtr<GridProvider> gridGenerator;
     if (useGridGenerator)
@@ -595,42 +591,35 @@ int main(int argc, char *argv[])
     std::string str, str2, configFile;
 
     if (argv != NULL) {
+
         try {
             //////////////////////////////////////////////////////////////////////////
+            // assuming that a config files is stored parallel to this file.
+            std::filesystem::path configPath = __FILE__;
 
-            std::string targetPath;
-
-            targetPath = __FILE__;
-
+            // the config file's default name can be replaced by passing a command line argument
+            std::string configName("configMusselOyster.txt");
             if (argc == 2) {
-                configFile = argv[1];
-                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+                configName = argv[1];
+                std::cout << "Using configFile command line argument: " << configName << std::endl;
             }
 
-#ifdef _WIN32
-            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
-#else
-            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
-#endif
+            configPath.replace_filename(configName);
 
-            std::cout << targetPath << std::endl;
-
-            if (configFile.size() == 0) {
-                configFile = targetPath + "configMusselOyster.txt";
-            }
-
-            multipleLevel(configFile);
+            multipleLevel(configPath);
 
             //////////////////////////////////////////////////////////////////////////
+        } catch (const spdlog::spdlog_ex &ex) {
+            std::cout << "Log initialization failed: " << ex.what() << std::endl;
         } catch (const std::bad_alloc &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+            VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
         } catch (const std::exception &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+            VF_LOG_CRITICAL("exception: {}", e.what());
         } catch (...) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+            VF_LOG_CRITICAL("Unknown exception!");
         }
     }
 
     MPI_Finalize();
     return 0;
-}
\ No newline at end of file
+}
diff --git a/apps/gpu/LBM/SphereGPU/Sphere.cpp b/apps/gpu/LBM/SphereGPU/Sphere.cpp
index f0d629afecca44f917d002543af7689e0889f42b..e0a860b90fbdd49f896ab28a873b9ef36036104e 100644
--- a/apps/gpu/LBM/SphereGPU/Sphere.cpp
+++ b/apps/gpu/LBM/SphereGPU/Sphere.cpp
@@ -1,10 +1,40 @@
-
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file LidDrivenCavity.cpp
+//! \ingroup Applications
+//! \author Martin Schoenherr, Stephan Lenz, Anna Wellmann
+//=======================================================================================
 #define _USE_MATH_DEFINES
 #include <exception>
 #include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <math.h>
 #include <memory>
 #include <sstream>
 #include <stdexcept>
@@ -13,15 +43,12 @@
 //////////////////////////////////////////////////////////////////////////
 
 #include "Core/DataTypes.h"
-#include "PointerDefinitions.h"
-
-#include "Core/StringUtilities/StringUtil.h"
-
+#include "Core/LbmOrGks.h"
+#include "Core/Logger/Logger.h"
 #include "Core/VectorTypes.h"
-
-#include <basics/config/ConfigurationFile.h>
-
-#include <logger/Logger.h>
+#include "PointerDefinitions.h"
+#include "config/ConfigurationFile.h"
+#include "logger/Logger.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -30,179 +57,198 @@
 #include "GridGenerator/grid/GridBuilder/MultipleGridBuilder.h"
 #include "GridGenerator/grid/GridFactory.h"
 
-#include "GridGenerator/io/GridVTKWriter/GridVTKWriter.h"
-#include "GridGenerator/io/STLReaderWriter/STLReader.h"
-#include "GridGenerator/io/STLReaderWriter/STLWriter.h"
-#include "GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h"
-
 #include "GridGenerator/geometries/Sphere/Sphere.h"
+#include "GridGenerator/geometries/TriangularMesh/TriangularMesh.h"
 
 //////////////////////////////////////////////////////////////////////////
 
-#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
-#include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridProvider.h"
-#include "VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.h"
 #include "VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h"
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
 #include "VirtualFluids_GPU/LBM/Simulation.h"
 #include "VirtualFluids_GPU/Output/FileWriter.h"
 #include "VirtualFluids_GPU/Parameter/Parameter.h"
+#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.h"
+#include "VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.h"
 
-#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-//          U s e r    s e t t i n g s
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const real L = 1.0;
-const real dSphere = 0.2;
-const real Re = 1000.0; // related to the sphere's diameter
-const real velocity = 1.0;
-const real dt = (real)0.5e-3;
-const uint nx = 64;
-
-const uint timeStepOut = 10000;
-const uint timeStepEnd = 100000;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
 
-void multipleLevel(const std::string &configPath)
+int main(int argc, char *argv[])
 {
-    logging::Logger::addStream(&std::cout);
-    logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
-    logging::Logger::timeStamp(logging::Logger::ENABLE);
-    logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
-
-    vf::gpu::Communicator &communicator = vf::gpu::Communicator::getInstance();
-
-    auto gridFactory = GridFactory::make();
-    gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
-    auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    real dx = L / real(nx);
-
-    gridBuilder->addCoarseGrid(-1.0 * L, -0.5 * L, -0.5 * L, 
-                                6.0 * L,  0.5 * L,  0.5 * L, dx);
-
-    Object *sphere = new Sphere(0.0, 0.0, 0.0, dSphere / 2.0);
-    gridBuilder->addGeometry(sphere);
-
-    gridBuilder->setPeriodicBoundaryCondition(false, false, false);
-
-    gridBuilder->buildGrids(LBM, false); // buildGrids() has to be called before setting the BCs!!!!
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    vf::basics::ConfigurationFile config;
-    config.load(configPath);
-
-    SPtr<Parameter> para =
-        std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
-    BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    const real velocityLB = velocity * dt / dx; // LB units
-
-    const real viscosityLB = (dSphere / dx) * velocityLB / Re; // LB units
-
-    VF_LOG_INFO("velocity  [dx/dt] = {}", velocityLB);
-    VF_LOG_INFO("viscosity [dx^2/dt] = {}", viscosityLB);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    para->setDevices(std::vector<uint>{ (uint)0 });
-
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
-    para->setPrintFiles(true);
-
-    para->setMaxLevel(2);
-
-    para->setVelocity(velocityLB);
-    para->setViscosity(viscosityLB);
-
-    para->setVelocityRatio(velocity / velocityLB);
-    para->setDensityRatio((real)1.0);
-
-    // para->setMainKernel("CumulantK17CompChim");
-
-    // para->setInitialCondition([&](real coordX, real coordY, real coordZ, real &rho, real &vx, real &vy, real &vz) {
-    //     rho = (real)0.0;
-    //     vx =  (real)velocityLB;
-    //     vy =  (real)0.0;
-    //     vz =  (real)0.0;
-    // });
-
-    para->setTOut(timeStepOut);
-    para->setTEnd(timeStepEnd);
-
-    /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
-
-    gridBuilder->setSlipBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
-    gridBuilder->setSlipBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
-    gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0);
-    gridBuilder->setSlipBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
-
-    // gridBuilder->setNoSlipBoundaryCondition(SideType::GEOMETRY); // not working yet, use veloBC
-    gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, velocityLB * 2.0, 0.0);
-    gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure boundary condition last
-
-    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressible);
-    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
-    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
-    // bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-    // gridBuilder->writeGridsToVtk("grid/");
+    try {
+        //////////////////////////////////////////////////////////////////////////
+        // Simulation parameters
+        //////////////////////////////////////////////////////////////////////////
+
+        const bool useConfigFile = true;
+
+        const real L = 1.0;
+        const real dSphere = 0.2;
+        const real Re = 1000.0; // related to the sphere's diameter
+        const real velocity = 1.0;
+        const real dt = (real)0.5e-3;
+        const uint nx = 64;
+
+        const uint timeStepOut = 1000;
+        const uint timeStepEnd = 10000;
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup logger
+        //////////////////////////////////////////////////////////////////////////
+
+        logging::Logger::addStream(&std::cout);
+        logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
+        logging::Logger::timeStamp(logging::Logger::ENABLE);
+        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup simulation parameters (with or without config file)
+        //////////////////////////////////////////////////////////////////////////
+
+        vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();;
+        SPtr<Parameter> para;
+        BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
+        vf::basics::ConfigurationFile config;
+        if (useConfigFile) {
+            //////////////////////////////////////////////////////////////////////////
+            // read simulation parameters from config file
+            //////////////////////////////////////////////////////////////////////////
+
+            // assuming that a config files is stored parallel to this file.
+            std::filesystem::path configPath = __FILE__;
+
+            // the config file's default name can be replaced by passing a command line argument
+            std::string configName("config.txt");
+            if (argc == 2) {
+                configName = argv[1];
+                std::cout << "Using configFile command line argument: " << configName << std::endl;
+            }
+
+            configPath.replace_filename(configName);
+            config.load(configPath.string());
+
+            para = std::make_shared<Parameter>(&config);
+        } else {
+            para = std::make_shared<Parameter>();
+        }
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup gridGenerator
+        //////////////////////////////////////////////////////////////////////////
+
+        auto gridFactory = GridFactory::make();
+        gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+        auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+        //////////////////////////////////////////////////////////////////////////
+        // create grid
+        //////////////////////////////////////////////////////////////////////////
+
+        real dx = L / real(nx);
+        gridBuilder->addCoarseGrid(-1.0 * L, -0.8 * L, -0.8 * L,
+                                    6.0 * L,  0.8 * L,  0.8 * L, dx);
+
+        // use primitive
+        Object *sphere = new Sphere(0.0, 0.0, 0.0, dSphere / 2.0);
+
+        // use stl
+        // std::string stlPath = "stl/sphere02.stl";
+        // if (useConfigFile && config.contains("STLPath")) {
+        //     stlPath = config.getValue<std::string>("STLPath");
+        // }
+        // std::cout << "Reading stl from " << stlPath << "." << std::endl;
+        // Object *sphere = TriangularMesh::make(stlPath);
+
+        gridBuilder->addGeometry(sphere);
+        gridBuilder->setPeriodicBoundaryCondition(false, false, false);
+        gridBuilder->buildGrids(LBM, false);  // buildGrids() has to be called before setting the BCs!!!!
+
+        //////////////////////////////////////////////////////////////////////////
+        // compute parameters in lattice units
+        //////////////////////////////////////////////////////////////////////////
+
+        const real velocityLB = velocity * dt / dx; // LB units
+        const real viscosityLB =  (dSphere / dx) * velocityLB / Re; // LB units
+
+        VF_LOG_INFO("LB parameters:");
+        VF_LOG_INFO("velocity LB [dx/dt]              = {}", velocityLB);
+        VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
+
+        //////////////////////////////////////////////////////////////////////////
+        // set parameters
+        //////////////////////////////////////////////////////////////////////////
+
+        para->setPrintFiles(true);
+
+        para->setVelocityLB(velocityLB);
+        para->setViscosityLB(viscosityLB);
+
+        para->setVelocityRatio(velocity / velocityLB);
+        para->setDensityRatio((real)1.0);
+
+        para->setTimestepOut(timeStepOut);
+        para->setTimestepEnd(timeStepEnd);
+
+        //////////////////////////////////////////////////////////////////////////
+        // set boundary conditions
+        //////////////////////////////////////////////////////////////////////////
+
+        gridBuilder->setVelocityBoundaryCondition(SideType::MX, velocityLB, 0.0, 0.0);
+
+        gridBuilder->setSlipBoundaryCondition(SideType::PY, 0.0, 0.0, 0.0);
+        gridBuilder->setSlipBoundaryCondition(SideType::MY, 0.0, 0.0, 0.0);
+        gridBuilder->setSlipBoundaryCondition(SideType::PZ, 0.0, 0.0, 0.0);
+        gridBuilder->setSlipBoundaryCondition(SideType::MZ, 0.0, 0.0, 0.0);
+
+        gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
+        gridBuilder->setPressureBoundaryCondition(SideType::PX, 0.0); // set pressure boundary condition last
+
+        bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
+        bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressible);
+        bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
+        bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
+
+        //////////////////////////////////////////////////////////////////////////
+        // setup probe(s)
+        //////////////////////////////////////////////////////////////////////////
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        const uint tStartAveraging = 0;
+        const uint tAveraging      = 100;
+        const uint tStartOutProbe  = 0;
+        const uint tOutProbe       = para->getTimestepOut();
+        SPtr<PointProbe> pointProbe = std::make_shared<PointProbe>( "pointProbe", para->getOutputPath(), tStartAveraging, tAveraging, tStartOutProbe, tOutProbe);
+        std::vector<real> probeCoordsX = {0.3, 0.5};
+        std::vector<real> probeCoordsY = {0.0, 0.0};
+        std::vector<real> probeCoordsZ = {0.0, 0.0};
+        pointProbe->addProbePointsFromList(probeCoordsX, probeCoordsY, probeCoordsZ);
 
-    auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+        pointProbe->addStatistic(Statistic::Instantaneous);
+        pointProbe->addStatistic(Statistic::Means);
+        pointProbe->addStatistic(Statistic::Variances);
+        para->addProbe( pointProbe );
 
-    auto gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
+        SPtr<PlaneProbe> planeProbe = std::make_shared<PlaneProbe>("planeProbe", para->getOutputPath(), tStartAveraging, tAveraging, tStartOutProbe, tOutProbe);
+        planeProbe->setProbePlane(dSphere, 0, 0, 0.5, 0.1, 0.1);
+        planeProbe->addStatistic(Statistic::Means);
+        para->addProbe( planeProbe );
 
-    Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
-    sim.run();
+        //////////////////////////////////////////////////////////////////////////
+        // setup to copy mesh to simulation
+        //////////////////////////////////////////////////////////////////////////
 
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-}
+        auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
+        SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
 
-int main(int argc, char *argv[])
-{
-    try {
-        vf::logging::Logger::initalizeLogger();
+        //////////////////////////////////////////////////////////////////////////
+        // run simulation
+        //////////////////////////////////////////////////////////////////////////
 
-        // assuming that the config files is stored parallel to this file.
-        std::filesystem::path filePath = __FILE__;
-        filePath.replace_filename("config.txt");
+        Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
+        sim.run();
 
-        multipleLevel(filePath.string());
     } catch (const spdlog::spdlog_ex &ex) {
         std::cout << "Log initialization failed: " << ex.what() << std::endl;
     } catch (const std::bad_alloc &e) {
diff --git a/apps/gpu/LBM/SphereGPU/config.txt b/apps/gpu/LBM/SphereGPU/config.txt
index 7d8b3831490ff828a3d5eaaa047aefeadcd6269e..9d0804ffc0d364e8d75ee16424378f360f07c52d 100644
--- a/apps/gpu/LBM/SphereGPU/config.txt
+++ b/apps/gpu/LBM/SphereGPU/config.txt
@@ -8,7 +8,7 @@
 #informations for Writing
 ##################################################
 Path=output/Sphere/
-Prefix=Sphere
+Prefix=Sphere01
 #WriteGrid=true
 ##################################################
 #informations for reading
diff --git a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
index e9058e455e9d41173e3c4d500a456cb4f71d6502..a5f25e3d52dfc03f5247867bb79a6edfbc4a26b4 100644
--- a/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
+++ b/apps/gpu/LBM/SphereScaling/SphereScaling.cpp
@@ -8,6 +8,7 @@
 #include <sstream>
 #include <stdexcept>
 #include <string>
+#include <filesystem>
 
 #include "mpi.h"
 
@@ -21,6 +22,7 @@
 #include "basics/Core/Logger/Logger.h"
 #include "basics/Core/StringUtilities/StringUtil.h"
 #include "basics/config/ConfigurationFile.h"
+#include "logger/Logger.h"
 
 //////////////////////////////////////////////////////////////////////////
 
@@ -85,7 +87,7 @@ const std::string stlPath("./stl/SphereScaling/");
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-void multipleLevel(const std::string &configPath)
+void multipleLevel(std::filesystem::path& configPath)
 {
     logging::Logger::addStream(&std::cout);
     logging::Logger::setDebugLevel(logging::Logger::Level::INFO_LOW);
@@ -99,9 +101,8 @@ void multipleLevel(const std::string &configPath)
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
 
     vf::basics::ConfigurationFile config;
-    std::cout << configPath << std::endl;
-    config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    config.load(configPath.string());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -133,47 +134,45 @@ void multipleLevel(const std::string &configPath)
     real vxLB        = (real)0.0005; // LB units
     real viscosityLB = 0.001;        //(vxLB * dxGrid) / Re;
 
-    para->setVelocity(vxLB);
-    para->setViscosity(viscosityLB);
+    para->setVelocityLB(vxLB);
+    para->setViscosityLB(viscosityLB);
     para->setVelocityRatio((real)58.82352941);
     para->setViscosityRatio((real)0.058823529);
     para->setDensityRatio((real)998.0);
 
-    *logging::out << logging::Logger::INFO_HIGH << "velocity LB [dx/dt] = " << vxLB << " \n";
-    *logging::out << logging::Logger::INFO_HIGH << "viscosity LB [dx^2/dt] = " << viscosityLB << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "velocity real [m/s] = " << vxLB * para->getVelocityRatio() << " \n";
-    *logging::out << logging::Logger::INFO_HIGH
-                  << "viscosity real [m^2/s] = " << viscosityLB * para->getViscosityRatio() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "dxGrid = " << dxGrid << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "useGridGenerator = " << useGridGenerator << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "useStreams = " << para->getUseStreams() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "number of processes = " << para->getNumprocs() << "\n";
-    *logging::out << logging::Logger::INFO_HIGH
-                  << "para->useReducedCommunicationAfterFtoC = " << para->useReducedCommunicationAfterFtoC << "\n";
-    *logging::out << logging::Logger::INFO_HIGH << "scalingType = " << scalingType << "\n";
-
-    // para->setTOut(10);
-    // para->setTEnd(10);
+
+    // para->setTimestepOut(10);
+    // para->setTimestepEnd(10);
 
     para->setCalcDragLift(false);
     para->setUseWale(false);
 
-    if (para->getOutputPath().size() == 0) {
-        para->setOutputPath(outPath);
-    }
+
     para->setOutputPrefix(simulationName);
-    para->setFName(para->getOutputPath() + para->getOutputPrefix());
+    if (para->getOutputPath() == "output/") {para->setOutputPath(outPath);}
     para->setPrintFiles(true);
-    std::cout << "Write result files to " << para->getFName() << std::endl;
-
-    if (useLevels)
-        para->setMaxLevel(2);
-    else
-        para->setMaxLevel(1);
 
     // para->setMainKernel("CumulantK17CompChim");
     para->setMainKernel("CumulantK17CompChimStream");
-    *logging::out << logging::Logger::INFO_HIGH << "Kernel: " << para->getMainKernel() << "\n";
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    VF_LOG_INFO("LB parameters:");
+    VF_LOG_INFO("velocity LB [dx/dt]              = {}", vxLB);
+    VF_LOG_INFO("viscosity LB [dx/dt]             = {}", viscosityLB);
+    VF_LOG_INFO("dxGrid [-]                       = {}\n", dxGrid);
+
+    VF_LOG_INFO("world parameters:");
+    VF_LOG_INFO("velocity [m/s]                   = {}", vxLB * para->getVelocityRatio());
+    VF_LOG_INFO("viscosity [m^2/s]                = {}\n", viscosityLB * para->getViscosityRatio());
+
+    VF_LOG_INFO("simulation parameters:");
+    VF_LOG_INFO("useGridGenerator                 = {}", useGridGenerator);
+    VF_LOG_INFO("useStreams                       = {}", para->getUseStreams());
+    VF_LOG_INFO("number of processes              = {}", para->getNumprocs());
+    VF_LOG_INFO("useReducedCommunicationAfterFtoC = {}", para->useReducedCommunicationAfterFtoC);
+    VF_LOG_INFO("scalingType                      = {}", scalingType);
+    VF_LOG_INFO("mainKernel                       = {}\n", para->getMainKernel());
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -602,8 +601,6 @@ void multipleLevel(const std::string &configPath)
                 // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
                 //////////////////////////////////////////////////////////////////////////
             }
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(para->getUseStreams());
 
             // gridBuilder->writeGridsToVtk(outPath + "grid/part" + std::to_string(generatePart) + "_");
             // gridBuilder->writeGridsToVtk(outPath +std::to_string(generatePart) + "/grid/");
@@ -651,8 +648,6 @@ void multipleLevel(const std::string &configPath)
 
             // gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
             //////////////////////////////////////////////////////////////////////////
-            if (para->getKernelNeedsFluidNodeIndicesToRun())
-                gridBuilder->findFluidNodes(para->getUseStreams());
 
             // gridBuilder->writeGridsToVtk("E:/temp/MusselOyster/" + "/grid/");
             // gridBuilder->writeArrows ("E:/temp/MusselOyster/" + "/arrow");
@@ -689,37 +684,29 @@ int main(int argc, char *argv[])
 
         try {
             //////////////////////////////////////////////////////////////////////////
+            // assuming that a config files is stored parallel to this file.
+            std::filesystem::path configPath = __FILE__;
 
-            std::string targetPath;
-
-            targetPath = __FILE__;
-
+            // the config file's default name can be replaced by passing a command line argument
+            std::string configName("config.txt");
             if (argc == 2) {
-                configFile = argv[1];
-                std::cout << "Using configFile command line argument: " << configFile << std::endl;
+                configName = argv[1];
+                std::cout << "Using configFile command line argument: " << configName << std::endl;
             }
 
-#ifdef _WIN32
-            targetPath = targetPath.substr(0, targetPath.find_last_of('\\') + 1);
-#else
-            targetPath = targetPath.substr(0, targetPath.find_last_of('/') + 1);
-#endif
-
-            std::cout << targetPath << std::endl;
-
-            if (configFile.size() == 0) {
-                configFile = targetPath + "config.txt";
-            }
+            configPath.replace_filename(configName);
 
-            multipleLevel(configFile);
+            multipleLevel(configPath);
 
             //////////////////////////////////////////////////////////////////////////
+        } catch (const spdlog::spdlog_ex &ex) {
+            std::cout << "Log initialization failed: " << ex.what() << std::endl;
         } catch (const std::bad_alloc &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
+            VF_LOG_CRITICAL("Bad Alloc: {}", e.what());
         } catch (const std::exception &e) {
-            *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
+            VF_LOG_CRITICAL("exception: {}", e.what());
         } catch (...) {
-            *logging::out << logging::Logger::LOGGER_ERROR << "Unknown exception!\n";
+            VF_LOG_CRITICAL("Unknown exception!");
         }
     }
 
diff --git a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
index 34793f04d901ac9a412c8085f169e9e79c61c3e4..168f05a9c3bcf5bc92040f640efbb8ce0efdff58 100644
--- a/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
+++ b/apps/gpu/LBM/TGV_3D/TGV_3D.cpp
@@ -121,10 +121,10 @@ void multipleLevel(const std::string& configPath)
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
 
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
+
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -157,7 +157,7 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-  
+
 	//std::stringstream _path;
  //   std::stringstream _prefix;
 
@@ -173,7 +173,7 @@ void multipleLevel(const std::string& configPath)
 
  //   para->setOutputPath(_path.str());
  //   para->setOutputPrefix(_prefix.str());
- //   para->setFName(_path.str() + "/" + _prefix.str());
+ //   para->setPathAndFilename(_path.str() + "/" + _prefix.str());
 
     //////////////////////////////////////////////////////////////////////////
 
@@ -206,20 +206,18 @@ void multipleLevel(const std::string& configPath)
     para->setDevices(std::vector<uint>{gpuIndex});
 
     //////////////////////////////////////////////////////////////////////////
-    
+
     para->setOutputPath( path );
     para->setOutputPrefix( simulationName );
 
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
     para->setPrintFiles(true);
 
-    para->setTEnd( 40 * lround(L/velocity) );	
-	para->setTOut(  5 * lround(L/velocity) );
+    para->setTimestepEnd( 40 * lround(L/velocity) );
+	para->setTimestepOut(  5 * lround(L/velocity) );
 
-    para->setVelocity( velocity );
+    para->setVelocityLB( velocity );
 
-    para->setViscosity( viscosity );
+    para->setViscosityLB( viscosity );
 
     para->setVelocityRatio( 1.0 / velocity );
 
@@ -249,7 +247,7 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
+
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
     //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
@@ -257,7 +255,7 @@ void multipleLevel(const std::string& configPath)
     SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
     Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
     sim.run();
-    
+
     sim.addKineticEnergyAnalyzer( 10 );
     sim.addEnstrophyAnalyzer( 10 );
 
@@ -268,11 +266,11 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2;
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
-        
+
         try
         {
             //////////////////////////////////////////////////////////////////////////
@@ -313,14 +311,14 @@ int main( int argc, char* argv[])
 		}
         catch (const std::bad_alloc& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
             //std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
         }
         catch (const std::exception& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
             //std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
diff --git a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
index 3c2362c1c997ab7eb226d7aa9c51b40b1e7e5605..d3e068c80d02bd94e2d7143a7cf2cc8a233f409a 100644
--- a/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
+++ b/apps/gpu/LBM/TGV_3D_MultiGPU/TGV_3D_MultiGPU.cpp
@@ -109,12 +109,12 @@ std::string simulationName("TGV_3D");
 
 void multipleLevel(const std::string& configPath)
 {
-    
+
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     int sideLengthX, sideLengthY, sideLengthZ, rankX, rankY, rankZ;
 
-    
+
     if      (mpiWorldSize == 1 ) { sideLengthX = 1; sideLengthY = 1; sideLengthZ = 1; }
     else if (mpiWorldSize == 2 ) { sideLengthX = 2; sideLengthY = 1; sideLengthZ = 1; }
     else if (mpiWorldSize == 4 ) { sideLengthX = 2; sideLengthY = 2; sideLengthZ = 1; }
@@ -129,7 +129,7 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
     logging::Logger::addStream(&std::cout);
-    
+
     std::ofstream logFile( path + simulationName + "_rank_" + std::to_string(mpirank) + ".log" );
     logging::Logger::addStream(&logFile);
 
@@ -147,10 +147,10 @@ void multipleLevel(const std::string& configPath)
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
 
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
+
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     *logging::out << logging::Logger::INFO_HIGH << "SideLength = " << sideLengthX << " " << sideLengthY << " " << sideLengthZ << "\n";
@@ -194,7 +194,7 @@ void multipleLevel(const std::string& configPath)
     gridBuilder->addCoarseGrid(  rankX   *LX - PI - xOverlap,      rankY   *LY - PI - yOverlap,      rankZ   *LZ - PI - zOverlap,
                                 (rankX+1)*LX - PI + xOverlap,     (rankY+1)*LY - PI + yOverlap,     (rankZ+1)*LZ - PI + zOverlap, dx);
 
-    gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - PI, (rankX+1)*LX - PI, 
+    gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( rankX*LX - PI, (rankX+1)*LX - PI,
                                                                  rankY*LY - PI, (rankY+1)*LY - PI,
                                                                  rankZ*LZ - PI, (rankZ+1)*LZ - PI  ) );
 
@@ -249,28 +249,26 @@ void multipleLevel(const std::string& configPath)
 
     //para->setDevices(std::vector<uint>{0,1});
     para->setDevices(devices);
-	
+
 	para->setMaxDev(mpiWorldSize);
 
     //////////////////////////////////////////////////////////////////////////
-    
+
     para->setOutputPath( path );
     para->setOutputPrefix( simulationName );
 
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
     para->setPrintFiles(true);
 
- //   para->setTEnd( 40 * lround(L/velocity) );	
-	//para->setTOut(  5 * lround(L/velocity) );
-	para->setTOut(  100  );
+ //   para->setTimestepEnd( 40 * lround(L/velocity) );
+	//para->setTimestepOut(  5 * lround(L/velocity) );
+	para->setTimestepOut(  100  );
 
-    para->setTEnd( 1000 );	
-	//para->setTOut(    1 );
+    para->setTimestepEnd( 1000 );
+	//para->setTimestepOut(    1 );
 
-    para->setVelocity( velocity );
+    para->setVelocityLB( velocity );
 
-    para->setViscosity( viscosity );
+    para->setViscosityLB( viscosity );
 
     para->setVelocityRatio( 1.0 / velocity );
 
@@ -310,7 +308,7 @@ void multipleLevel(const std::string& configPath)
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    
+
     auto cudaMemoryManager = std::make_shared<CudaMemoryManager>(para);
     SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
     //SPtr<GridProvider> gridGenerator = GridProvider::makeGridReader(FILEFORMAT::BINARY, para, cudaMemoryManager);
@@ -318,7 +316,7 @@ void multipleLevel(const std::string& configPath)
     SPtr<FileWriter> fileWriter = SPtr<FileWriter>(new FileWriter());
     Simulation sim(para, cudaMemoryManager, communicator, *gridGenerator, &bcFactory);
     sim.run();
-    
+
     sim.addKineticEnergyAnalyzer( 10 );
     sim.addEnstrophyAnalyzer( 10 );
 
@@ -331,11 +329,11 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2;
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
-        
+
         try
         {
             MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
@@ -407,14 +405,14 @@ int main( int argc, char* argv[])
 		}
         catch (const std::bad_alloc& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
             //std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
         }
         catch (const std::exception& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
             //std::cout << e.what() << std::flush;
             //MPI_Abort(MPI_COMM_WORLD, -1);
diff --git a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
index 72577799763e607d1cc5bea0d48e807594b9a5f8..a5534cf081d46829bc9e3a15418c3d756eed2a48 100644
--- a/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
+++ b/apps/gpu/LBM/WTG_RUB/WTG_RUB.cpp
@@ -200,7 +200,7 @@ void multipleLevel(const std::string& configPath)
     vf::basics::ConfigurationFile config;
     config.load(configPath);
 
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -219,14 +219,12 @@ void multipleLevel(const std::string& configPath)
     para->setOutputPath( path );
     para->setOutputPrefix( "Unified_" + simulationName );
 
-    para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
     para->setPrintFiles(true);
 
     para->setMaxLevel(maxLevel);
 
-    para->setVelocity(velocityLB);
-    para->setViscosity(viscosityLB);
+    para->setVelocityLB(velocityLB);
+    para->setViscosityLB(viscosityLB);
 
     para->setVelocityRatio(velocity/ velocityLB);
 
@@ -239,8 +237,8 @@ void multipleLevel(const std::string& configPath)
         vz  = (real)0.0;
     });
 
-    para->setTOut( timeStepOut );
-    para->setTEnd( timeStepEnd );
+    para->setTimestepOut( timeStepOut );
+    para->setTimestepEnd( timeStepEnd );
 
     /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
diff --git a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
index ecb43f56c79c25fc3aed6f404d717b8181fb0de1..433c13abbf96b08a1f5f00c216187d2be736591a 100644
--- a/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
+++ b/apps/gpu/LBM/gridGeneratorTest/gridGenerator.cpp
@@ -76,11 +76,11 @@ void multipleLevel(const std::string& configPath)
     //gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_UNDER_TRIANGLE);
 
     auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
+
     vf::gpu::Communicator& communicator = vf::gpu::Communicator::getInstance();
     vf::basics::ConfigurationFile config;
     config.load(configPath);
-    SPtr<Parameter> para = std::make_shared<Parameter>(config, communicator.getNummberOfProcess(), communicator.getPID());
+    SPtr<Parameter> para = std::make_shared<Parameter>(communicator.getNummberOfProcess(), communicator.getPID(), &config);
     BoundaryConditionFactory bcFactory = BoundaryConditionFactory();
 
 
@@ -104,7 +104,7 @@ void multipleLevel(const std::string& configPath)
         };
 
         int testcase = SphereTest;
-        
+
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		if (testcase == TGV)
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -118,7 +118,7 @@ void multipleLevel(const std::string& configPath)
 			//////////////////////////////////////////////////////////////////////////
 			gridBuilder->setPeriodicBoundaryCondition(true, true, true);
 			//////////////////////////////////////////////////////////////////////////
-			gridBuilder->buildGrids(LBM, true); 
+			gridBuilder->buildGrids(LBM, true);
 			//////////////////////////////////////////////////////////////////////////
 			SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
 			//////////////////////////////////////////////////////////////////////////
@@ -178,17 +178,15 @@ void multipleLevel(const std::string& configPath)
             para->setOutputPath( "F:/Work/Computations/out/Sphere/" );
             para->setOutputPrefix( "Sphere" );
 
-            para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
-    
-            para->setVelocity( vx );
-            para->setViscosity( ( vx * D / dx ) / Re );
+
+            para->setVelocityLB( vx );
+            para->setViscosityLB( ( vx * D / dx ) / Re );
 
             para->setVelocityRatio(1.0);
 
-            para->setTOut( 1000 );
-            para->setTEnd( 100000 );
+            para->setTimestepOut( 1000 );
+            para->setTimestepEnd( 100000 );
 
             para->setCalcDragLift(true);
 
@@ -211,7 +209,7 @@ void multipleLevel(const std::string& configPath)
 
             //gridBuilder->setNumberOfLayers(10,8);
             //gridBuilder->addGrid(SphereSTL, 2);
-            
+
             gridBuilder->setNumberOfLayers(4,8);
             gridBuilder->addGrid(sphereRef_1_STL, 1);
             //gridBuilder->addGrid(sphereRef_2_STL, 4);
@@ -220,7 +218,7 @@ void multipleLevel(const std::string& configPath)
             //gridBuilder->addGrid(sphere, 5);
 
 
-        
+
             //gridBuilder->addGeometry(SphereSTL);
             gridBuilder->addGeometry(sphere);
 
@@ -237,7 +235,7 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->setVelocityBoundaryCondition(SideType::MX, vx, 0.0, 0.0);
 
             gridBuilder->setVelocityBoundaryCondition(SideType::GEOMETRY, 0.0, 0.0, 0.0);
-            
+
             bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
             bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
             bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
@@ -266,18 +264,16 @@ void multipleLevel(const std::string& configPath)
             para->setOutputPath( "F:/Work/Computations/out/DrivAerNew/" );
             para->setOutputPrefix( "DrivAer" );
 
-            para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
-    
-            para->setVelocity( vx );
-            para->setViscosity( ( vx * L / dx ) / Re );
+
+            para->setVelocityLB( vx );
+            para->setViscosityLB( ( vx * L / dx ) / Re );
 
             //para->setVelocityRatio(1.0 / velocityLB);
             para->setVelocityRatio(1.0);
 
-            para->setTOut( 10000 );
-            para->setTEnd( 100000 );
+            para->setTimestepOut( 10000 );
+            para->setTimestepEnd( 100000 );
 
             para->setUseWale(false);
 
@@ -309,7 +305,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid(DrivAerRefBoxSTL, 4);
-        
+
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid(DrivAerSTL, 5);
 
@@ -372,17 +368,15 @@ void multipleLevel(const std::string& configPath)
             para->setOutputPath( "F:/Work/Computations/out/PaperPlane/" );
             para->setOutputPrefix( "PaperPlaneK17winglet" );
 
-            para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
-    
-            para->setVelocity( vx );
-            para->setViscosity( ( vx * L / dx ) / Re );
+
+            para->setVelocityLB( vx );
+            para->setViscosityLB( ( vx * L / dx ) / Re );
 
             para->setVelocityRatio(1.0);
 
-            para->setTOut( 1000 );
-            para->setTEnd( 100000 );
+            para->setTimestepOut( 1000 );
+            para->setTimestepEnd( 100000 );
 
             para->setUseWale(false);
 
@@ -392,7 +386,7 @@ void multipleLevel(const std::string& configPath)
 
             TriangularMesh* STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1.stl");
             //TriangularMesh* STL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet.stl");
-            
+
             TriangularMesh* RefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_ref.stl");
             //TriangularMesh* RefBoxSTL = TriangularMesh::make("F:/Work/Computations/gridGenerator/stl/PaperPlane_1_winglet_ref.stl");
 
@@ -452,17 +446,15 @@ void multipleLevel(const std::string& configPath)
             para->setOutputPath( "F:/Work/Computations/out/StlGroupTest/" );
             para->setOutputPrefix( "StlGroupTest" );
 
-            para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
-    
-            para->setVelocity( vx );
-            para->setViscosity( ( vx * L / dx ) / Re );
+
+            para->setVelocityLB( vx );
+            para->setViscosityLB( ( vx * L / dx ) / Re );
 
             para->setVelocityRatio(1.0);
 
-            para->setTOut( 1000 );
-            para->setTEnd( 100000 );
+            para->setTimestepOut( 1000 );
+            para->setTimestepEnd( 100000 );
 
             para->setUseWale(false);
 
@@ -551,7 +543,7 @@ void multipleLevel(const std::string& configPath)
 
             gridBuilder->addCoarseGrid(-30.0, -20.0,  0.0 - z0,
                                         50.0,  20.0, 25.0 - z0, dx);
-            
+
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid( new Cuboid( - 6.6, -6, -0.7, 20.6 , 6, 5.3  ), 1 );
             gridBuilder->addGrid( new Cuboid( -3.75, -3, -0.7, 11.75, 3, 2.65 ), 2 );
@@ -559,7 +551,7 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->setNumberOfLayers(10,8);
             gridBuilder->addGrid(DLC_RefBox_Level_3, 3);
             gridBuilder->addGrid(DLC_RefBox_Level_4, 4);
-        
+
             Conglomerate* refinement = new Conglomerate();
             refinement->add(DLC_RefBox_Level_5);
             refinement->add(VW370_SERIE_STL);
@@ -588,7 +580,7 @@ void multipleLevel(const std::string& configPath)
             bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
             bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
             bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
- 
+
             //////////////////////////////////////////////////////////////////////////
 
             SPtr<Grid> grid = gridBuilder->getGrid(gridBuilder->getNumberOfLevels() - 1);
@@ -610,13 +602,13 @@ void multipleLevel(const std::string& configPath)
 
             for( uint patch : frontWheelPatches ){
                 gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, patch, wheelsFrontX, -2.0, wheelsFrontZ,
-                                                                                                                                             wheelsFrontX,  2.0, wheelsFrontZ, 
+                                                                                                                                             wheelsFrontX,  2.0, wheelsFrontZ,
 					                                                                                                                         wheelTangentialVelocity, wheelsRadius);
             }
 
             for( uint patch : rearWheelPatches ){
                 gridBuilder->getGeometryBoundaryCondition(gridBuilder->getNumberOfLevels() - 1)->setTangentialVelocityForPatch( grid, patch, wheelsRearX , -2.0, wheelsRearZ ,
-                                                                                                                                             wheelsRearX ,  2.0, wheelsRearZ , 
+                                                                                                                                             wheelsRearX ,  2.0, wheelsRearZ ,
 					                                                                                                                         wheelTangentialVelocity, wheelsRadius);
             }
 
@@ -631,7 +623,7 @@ void multipleLevel(const std::string& configPath)
             //SimulationFileWriter::write("D:/GRIDGENERATION/files/", gridBuilder, FILEFORMAT::ASCII);
             //SimulationFileWriter::write("C:/Users/lenz/Desktop/Work/gridGenerator/grid/", gridBuilder, FILEFORMAT::ASCII);
             SimulationFileWriter::write("grid/", gridBuilder, FILEFORMAT::ASCII);
-            
+
             //gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
         }
 
@@ -649,17 +641,15 @@ void multipleLevel(const std::string& configPath)
             para->setOutputPath( "F:/Work/Computations/out/Sphere/" );
             para->setOutputPrefix( "Sphere" );
 
-            para->setFName(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
-    
-            para->setVelocity( vx );
-            para->setViscosity( ( vx * D / dx ) / Re );
+
+            para->setVelocityLB( vx );
+            para->setViscosityLB( ( vx * D / dx ) / Re );
 
             para->setVelocityRatio(1.0);
 
-            para->setTOut( 1000 );
-            para->setTEnd( 100000 );
+            para->setTimestepOut( 1000 );
+            para->setTimestepEnd( 100000 );
 
             para->setCalcDragLift(true);
 
@@ -672,13 +662,13 @@ void multipleLevel(const std::string& configPath)
 
             //const uint generatePart = 1;
             const uint generatePart = communicator.getPID();
-            
+
             std::ofstream logFile2;
-            
+
             if( generatePart == 0 )
                 logFile2.open( "F:/Work/Computations/gridGenerator/grid/0/gridGeneratorLog.txt" );
                 //logFile2.open( "grid/0/gridGeneratorLog.txt" );
-            
+
             if( generatePart == 1 )
                 logFile2.open( "F:/Work/Computations/gridGenerator/grid/1/gridGeneratorLog.txt" );
                 //logFile2.open( "grid/1/gridGeneratorLog.txt" );
@@ -689,17 +679,17 @@ void multipleLevel(const std::string& configPath)
             //TriangularMesh* triangularMesh = TriangularMesh::make("stl/ShpereNotOptimal.lnx.stl");
 
             // all
-            //gridBuilder->addCoarseGrid(-2, -2, -2,  
+            //gridBuilder->addCoarseGrid(-2, -2, -2,
             //                            4,  2,  2, dx);
 
             real overlap = 10.0 * dx;
 
             if( generatePart == 0 )
-                gridBuilder->addCoarseGrid(-2.0          , -2.0, -2.0,  
+                gridBuilder->addCoarseGrid(-2.0          , -2.0, -2.0,
                                             0.5 + overlap,  2.0,  2.0, dx);
 
             if( generatePart == 1 )
-                gridBuilder->addCoarseGrid( 0.5 - overlap, -2.0, -2.0,  
+                gridBuilder->addCoarseGrid( 0.5 - overlap, -2.0, -2.0,
                                             4.0          ,  2.0,  2.0, dx);
 
 
@@ -707,26 +697,26 @@ void multipleLevel(const std::string& configPath)
             gridBuilder->addGrid(triangularMesh, 1);
 
             gridBuilder->addGeometry(triangularMesh);
-            
+
             if( generatePart == 0 )
-                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0, 0.5, 
-                                                                             -2.0, 2.0, 
+                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>( -2.0, 0.5,
+                                                                             -2.0, 2.0,
                                                                              -2.0, 2.0 ) );
-            
+
             if( generatePart == 1 )
-                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.5, 4.0, 
-                                                                             -2.0, 2.0, 
+                gridBuilder->setSubDomainBox( std::make_shared<BoundingBox>(  0.5, 4.0,
+                                                                             -2.0, 2.0,
                                                                              -2.0, 2.0 ) );
 
             gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
             gridBuilder->buildGrids(LBM, true); // buildGrids() has to be called before setting the BCs!!!!
-            
+
             if( generatePart == 0 ){
                 gridBuilder->findCommunicationIndices(CommunicationDirections::PX, LBM);
                 gridBuilder->setCommunicationProcess(CommunicationDirections::PX, 1);
             }
-            
+
             if( generatePart == 1 ){
                 gridBuilder->findCommunicationIndices(CommunicationDirections::MX, LBM);
                 gridBuilder->setCommunicationProcess(CommunicationDirections::MX, 0);
@@ -771,7 +761,7 @@ void multipleLevel(const std::string& configPath)
                 //SimulationFileWriter::write("grid/1/", gridBuilder, FILEFORMAT::ASCII);
 
             //return;
-            
+
             //gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
         }
 
@@ -804,11 +794,11 @@ void multipleLevel(const std::string& configPath)
 int main( int argc, char* argv[])
 {
     MPI_Init(&argc, &argv);
-    std::string str, str2; 
+    std::string str, str2;
     if ( argv != NULL )
     {
         //str = static_cast<std::string>(argv[0]);
-        
+
         try
         {
             //////////////////////////////////////////////////////////////////////////
@@ -831,12 +821,12 @@ int main( int argc, char* argv[])
 		}
         catch (const std::bad_alloc& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
         }
         catch (const std::exception& e)
         {
-                
+
             *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
         }
         catch (...)
diff --git a/apps/gpu/LBM/lbmTest/main.cpp b/apps/gpu/LBM/lbmTest/main.cpp
index bd33f7c987934b73f41e2057da5e42655c406687..3bfcd6bcd054ff5fcb3584e314c1d67a0becfe22 100644
--- a/apps/gpu/LBM/lbmTest/main.cpp
+++ b/apps/gpu/LBM/lbmTest/main.cpp
@@ -76,7 +76,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     std::string gridPath = getGridPath(para, _gridpath);
     para->setOutputPath(_path);
     para->setOutputPrefix(_prefix);
-    para->setFName(_path + "/" + _prefix);
+    para->setPathAndFilename(_path + "/" + _prefix);
     para->setPrintFiles(false);
     para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
     para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
@@ -90,9 +90,9 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
     para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
     para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
+    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
+    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
+    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
     para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
     para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
     para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
@@ -106,8 +106,8 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
     para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
     //////////////////////////////////////////////////////////////////////////
-    para->setViscosity(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocity(StringUtil::toFloat(input->getValue("Velocity_LB")));
+    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
+    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
     para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
     para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
     para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
diff --git a/apps/gpu/LBM/metisTest/main.cpp b/apps/gpu/LBM/metisTest/main.cpp
index 679119509d66737f3db132b676c73de5a146b779..6a62ff72c7b71211610ba4e40f81e9a1f527eb7f 100644
--- a/apps/gpu/LBM/metisTest/main.cpp
+++ b/apps/gpu/LBM/metisTest/main.cpp
@@ -76,7 +76,7 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     std::string gridPath = getGridPath(para, _gridpath);
     para->setOutputPath(_path);
     para->setOutputPrefix(_prefix);
-    para->setFName(_path + "/" + _prefix);
+    para->setPathAndFilename(_path + "/" + _prefix);
     para->setPrintFiles(false);
     para->setPrintFiles(StringUtil::toBool(input->getValue("WriteGrid")));
     para->setGeometryValues(StringUtil::toBool(input->getValue("GeometryValues")));
@@ -90,9 +90,9 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setUseWale(StringUtil::toBool(input->getValue("UseWale")));
     para->setSimulatePorousMedia(StringUtil::toBool(input->getValue("SimulatePorousMedia")));
     para->setD3Qxx(StringUtil::toInt(input->getValue("D3Qxx")));
-    para->setTEnd(StringUtil::toInt(input->getValue("TimeEnd")));
-    para->setTOut(StringUtil::toInt(input->getValue("TimeOut")));
-    para->setTStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
+    para->setTimestepEnd(StringUtil::toInt(input->getValue("TimeEnd")));
+    para->setTimestepOut(StringUtil::toInt(input->getValue("TimeOut")));
+    para->setTimestepStartOut(StringUtil::toInt(input->getValue("TimeStartOut")));
     para->setTimeCalcMedStart(StringUtil::toInt(input->getValue("TimeStartCalcMedian")));
     para->setTimeCalcMedEnd(StringUtil::toInt(input->getValue("TimeEndCalcMedian")));
     para->setPressInID(StringUtil::toInt(input->getValue("PressInID")));
@@ -106,8 +106,8 @@ void setParameters(std::shared_ptr<Parameter> para, std::unique_ptr<input::Input
     para->setTemperatureInit(StringUtil::toFloat(input->getValue("Temp")));
     para->setTemperatureBC(StringUtil::toFloat(input->getValue("TempBC")));
     //////////////////////////////////////////////////////////////////////////
-    para->setViscosity(StringUtil::toFloat(input->getValue("Viscosity_LB")));
-    para->setVelocity(StringUtil::toFloat(input->getValue("Velocity_LB")));
+    para->setViscosityLB(StringUtil::toFloat(input->getValue("Viscosity_LB")));
+    para->setVelocityLB(StringUtil::toFloat(input->getValue("Velocity_LB")));
     para->setViscosityRatio(StringUtil::toFloat(input->getValue("Viscosity_Ratio_World_to_LB")));
     para->setVelocityRatio(StringUtil::toFloat(input->getValue("Velocity_Ratio_World_to_LB")));
     para->setDensityRatio(StringUtil::toFloat(input->getValue("Density_Ratio_World_to_LB")));
diff --git a/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp b/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp
index e92a6c0dd4032fa8aa4c1ad519e18975976cdcbd..7fccd93194a2ecd20a9cd41be32497bd574f67bb 100644
--- a/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp
+++ b/apps/gpu/LidDrivenCavity/LidDrivenCavity.cpp
@@ -1,28 +1,28 @@
 //=======================================================================================
-// ____          ____    __    ______     __________   __      __       __        __         
-// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-//      \    \  |    |   ________________________________________________________________    
-//       \    \ |    |  |  ______________________________________________________________|   
-//        \    \|    |  |  |         __          __     __     __     ______      _______    
-//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
 //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
 //
-//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
 //  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of 
+//  License as published by the Free Software Foundation, either version 3 of
 //  the License, or (at your option) any later version.
-//  
-//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 //  for more details.
-//  
+//
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
@@ -128,7 +128,7 @@ int main( int argc, char* argv[])
         auto gridFactory = GridFactory::make();
         gridFactory->setGridStrategy(Device::CPU);
         auto gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
-    
+
         //////////////////////////////////////////////////////////////////////////
         // create grid
         //////////////////////////////////////////////////////////////////////////
@@ -141,7 +141,7 @@ int main( int argc, char* argv[])
         gridBuilder->setPeriodicBoundaryCondition(false, false, false);
 
         gridBuilder->buildGrids(lbmOrGks, false);
-    
+
         //////////////////////////////////////////////////////////////////////////
         // branch between LBM and GKS
         //////////////////////////////////////////////////////////////////////////
@@ -164,7 +164,7 @@ int main( int argc, char* argv[])
 
             *logging::out << logging::Logger::INFO_HIGH << "velocity  [dx/dt] = " << velocityLB << " \n";
             *logging::out << logging::Logger::INFO_HIGH << "viscosity [dx^2/dt] = " << viscosityLB << "\n";
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set parameters
             //////////////////////////////////////////////////////////////////////////
@@ -172,8 +172,6 @@ int main( int argc, char* argv[])
             para->setOutputPath( path );
             para->setOutputPrefix( simulationName );
 
-            para->setPathAndFilename(para->getOutputPath() + "/" + para->getOutputPrefix());
-
             para->setPrintFiles(true);
 
             para->setVelocityLB(velocityLB);
@@ -183,7 +181,7 @@ int main( int argc, char* argv[])
 
             para->setTimestepOut( timeStepOut );
             para->setTimestepEnd( timeStepEnd );
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set boundary conditions
             //////////////////////////////////////////////////////////////////////////
@@ -205,7 +203,7 @@ int main( int argc, char* argv[])
             SPtr<CudaMemoryManager> cudaMemoryManager = CudaMemoryManager::make(para);
 
             SPtr<GridProvider> gridGenerator = GridProvider::makeGridGenerator(gridBuilder, para, cudaMemoryManager, communicator);
-    
+
             //////////////////////////////////////////////////////////////////////////
             // run simulation
             //////////////////////////////////////////////////////////////////////////
@@ -219,19 +217,19 @@ int main( int argc, char* argv[])
         else
         {
             CudaUtility::setCudaDevice(0);
-        
+
             Parameters parameters;
-    
+
             //////////////////////////////////////////////////////////////////////////
             // compute remaining parameters
             //////////////////////////////////////////////////////////////////////////
 
             const real vx = velocity / sqrt(2.0);
             const real vy = velocity / sqrt(2.0);
-    
+
             parameters.K  = 2.0;
             parameters.Pr = 1.0;
-        
+
             const real Ma = 0.1;
 
             real rho = 1.0;
@@ -244,7 +242,7 @@ int main( int argc, char* argv[])
             *logging::out << logging::Logger::INFO_HIGH << "mu  = " << mu << " m^2/s\n";
 
             *logging::out << logging::Logger::INFO_HIGH << "CFL = " << dt * ( velocity + cs ) / dx << "\n";
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set parameters
             //////////////////////////////////////////////////////////////////////////
@@ -255,7 +253,7 @@ int main( int argc, char* argv[])
             parameters.dx = dx;
 
             parameters.lambdaRef = lambda;
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set copy mesh to simulation
             //////////////////////////////////////////////////////////////////////////
@@ -265,7 +263,7 @@ int main( int argc, char* argv[])
             meshAdapter.inputGrid();
 
             auto dataBase = std::make_shared<DataBase>( "GPU" );
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set boundary conditions
             //////////////////////////////////////////////////////////////////////////
@@ -273,21 +271,21 @@ int main( int argc, char* argv[])
             SPtr<BoundaryCondition> bcLid  = std::make_shared<IsothermalWall>( dataBase, Vec3(  vx,  vy, 0.0 ), lambda, false );
             SPtr<BoundaryCondition> bcWall = std::make_shared<IsothermalWall>( dataBase, Vec3( 0.0, 0.0, 0.0 ), lambda, false );
 
-            bcLid->findBoundaryCells ( meshAdapter, false,  [&](Vec3 center){ return center.z >  0.5 && 
-                                                                                     center.x > -0.5 && 
-                                                                                     center.x <  0.5 && 
-                                                                                     center.y > -0.5 && 
+            bcLid->findBoundaryCells ( meshAdapter, false,  [&](Vec3 center){ return center.z >  0.5 &&
+                                                                                     center.x > -0.5 &&
+                                                                                     center.x <  0.5 &&
+                                                                                     center.y > -0.5 &&
                                                                                      center.y <  0.5; } );
 
-            bcWall->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5 || 
-                                                                                   center.x >  0.5 || 
-                                                                                   center.y < -0.5 || 
+            bcWall->findBoundaryCells( meshAdapter, true, [&](Vec3 center){ return center.x < -0.5 ||
+                                                                                   center.x >  0.5 ||
+                                                                                   center.y < -0.5 ||
                                                                                    center.y >  0.5 ||
                                                                                    center.z < -0.5; } );
 
             dataBase->boundaryConditions.push_back( bcLid  );
             dataBase->boundaryConditions.push_back( bcWall );
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set initial condition and upload mesh and initial condition to GPGPU
             //////////////////////////////////////////////////////////////////////////
@@ -304,7 +302,7 @@ int main( int argc, char* argv[])
             Initializer::initializeDataUpdate(dataBase);
 
             VtkWriter::write( dataBase, parameters, path + "/" + simulationName + "_0" );
-    
+
             //////////////////////////////////////////////////////////////////////////
             // set analyzers
             //////////////////////////////////////////////////////////////////////////
@@ -314,7 +312,7 @@ int main( int argc, char* argv[])
             ConvergenceAnalyzer convergenceAnalyzer( dataBase, 10000 );
 
             cupsAnalyzer.start();
-    
+
             //////////////////////////////////////////////////////////////////////////
             // run simulation
             //////////////////////////////////////////////////////////////////////////
@@ -329,7 +327,7 @@ int main( int argc, char* argv[])
 
                     VtkWriter::write( dataBase, parameters, path + "/" + simulationName + "_" + std::to_string( iter ) );
                 }
-            
+
                 int crashCellIndex = dataBase->getCrashCellIndex();
                 if( crashCellIndex >= 0 )
                 {
@@ -350,17 +348,17 @@ int main( int argc, char* argv[])
     }
     catch (const std::bad_alloc e)
     {
-                
+
         *logging::out << logging::Logger::LOGGER_ERROR << "Bad Alloc:" << e.what() << "\n";
     }
     catch (const std::exception& e)
     {
-                
+
         *logging::out << logging::Logger::LOGGER_ERROR << e.what() << "\n";
     }
     catch (std::string& s)
     {
-                
+
         *logging::out << logging::Logger::LOGGER_ERROR << s << "\n";
     }
     catch (...)
diff --git a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
index 0f254c99746a98f0e8f2323ba0f89e73e9317052..5a469b44ab21c9a0fc3850baf9f1a82c1f3a0314 100644
--- a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
+++ b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/LogFileReader/LogFileReader.cpp
@@ -11,7 +11,7 @@
 
 #include "Utilities/LogFileData/LogFileDataImp.h"
 
-#include "Core/Input/Input.h"
+#include <basics/config/ConfigurationFile.h>
 #include "Core/StringUtilities/StringUtil.h"
 
 #include "Utilities/AlmostEquals.h"
@@ -33,33 +33,27 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 {
 	std::shared_ptr<LogFileDataImp> logFileData = LogFileDataImp::getNewInstance();
 
-	std::ifstream stream;
-	stream.open(filePath.c_str(), std::ios::in);
-	if (stream.fail()) {
-		std::cout << "can not open log file!\n";
-		exit(1);
-	}
-
-	std::unique_ptr<input::Input> input = input::Input::makeInput(stream, "config");
+	auto input = std::make_shared<vf::basics::ConfigurationFile>();
+	input->load(filePath);
 
 	logFileData->setFilePath(filePath);
-	logFileData->setDate(input->getValue("Date"));
-	logFileData->setTime(input->getValue("Time"));
-	logFileData->setGpuDevices(StringUtil::toStringVector(input->getValue("GPU_Devices")));
+	logFileData->setDate(input->getValue<std::string>("Date"));
+	logFileData->setTime(input->getValue<std::string>("Time"));
+	logFileData->setGpuDevices(StringUtil::toStringVector(input->getValue<std::string>("GPU_Devices")));
 
-	logFileData->setKernel(input->getValue("Kernel"));
-	logFileData->setNumberOfTimeSteps(StringUtil::toInt(input->getValue("NumberOfTimeSteps")));
-	logFileData->setViscosity(StringUtil::toDouble(input->getValue("Viscosity")));
-	logFileData->setBasisTimeStepLength(StringUtil::toInt(input->getValue("BasisTimeStepLength")));
+	logFileData->setKernel(input->getValue<std::string>("Kernel"));
+	logFileData->setNumberOfTimeSteps(StringUtil::toInt(input->getValue<std::string>("NumberOfTimeSteps")));
+	logFileData->setViscosity(StringUtil::toDouble(input->getValue<std::string>("Viscosity")));
+	logFileData->setBasisTimeStepLength(StringUtil::toInt(input->getValue<std::string>("BasisTimeStepLength")));
 
-	logFileData->setSimName(input->getValue("SimulationName"));
+	logFileData->setSimName(input->getValue<std::string>("SimulationName"));
 
 
 
 
 	std::ostringstream simSigniture;
 	if (logFileData->getSimName() == "ShearWave") {
-		std::vector<double> shearWaveLx = StringUtil::toDoubleVector(input->getValue("Lx"));
+		std::vector<double> shearWaveLx = StringUtil::toDoubleVector(input->getValue<std::string>("Lx"));
 		logFileData->setBasicGridLengths(shearWaveLx);
 		std::vector<int> shearWaveL0;
 		std::vector<double> shearWaveUx;
@@ -69,9 +63,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 			l0 << "l0_" << shearWaveLx.at(i);
 			ux << "ux_" << shearWaveLx.at(i);
 			uz << "uz_" << shearWaveLx.at(i);
-			shearWaveL0.push_back(StringUtil::toInt(input->getValue(l0.str())));
-			shearWaveUx.push_back(StringUtil::toDouble(input->getValue(ux.str())));
-			shearWaveUz.push_back(StringUtil::toDouble(input->getValue(uz.str())));
+			shearWaveL0.push_back(StringUtil::toInt(input->getValue<std::string>(l0.str())));
+			shearWaveUx.push_back(StringUtil::toDouble(input->getValue<std::string>(ux.str())));
+			shearWaveUz.push_back(StringUtil::toDouble(input->getValue<std::string>(uz.str())));
 		}
 		std::shared_ptr<ShearWaveLogFileDataImp> swLogFileData = ShearWaveLogFileDataImp::getNewInstance();
 		swLogFileData->setL0(shearWaveL0);
@@ -82,7 +76,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 		logFileData->setBasicSimulation(ShearWave);
 	}
 	if (logFileData->getSimName() == "TaylorGreenVortexUx") {
-		std::vector<double> tgvUxLx = StringUtil::toDoubleVector(input->getValue("Lx"));
+		std::vector<double> tgvUxLx = StringUtil::toDoubleVector(input->getValue<std::string>("Lx"));
 		logFileData->setBasicGridLengths(tgvUxLx);
 		std::vector<int> tgvUxL0;
 		std::vector<double> tgvUxUx;
@@ -92,9 +86,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 			l0 << "l0_" << tgvUxLx.at(i);
 			ux << "ux_" << tgvUxLx.at(i);
 			amplitude << "Amplitude_" << tgvUxLx.at(i);
-			tgvUxL0.push_back(StringUtil::toInt(input->getValue(l0.str())));
-			tgvUxUx.push_back(StringUtil::toDouble(input->getValue(ux.str())));
-			tgvUxAmp.push_back(StringUtil::toDouble(input->getValue(amplitude.str())));
+			tgvUxL0.push_back(StringUtil::toInt(input->getValue<std::string>(l0.str())));
+			tgvUxUx.push_back(StringUtil::toDouble(input->getValue<std::string>(ux.str())));
+			tgvUxAmp.push_back(StringUtil::toDouble(input->getValue<std::string>(amplitude.str())));
 		}
 		std::shared_ptr<TaylorGreenVortexUxLogFileDataImp> tgvUxLogFileData = TaylorGreenVortexUxLogFileDataImp::getNewInstance();
 		tgvUxLogFileData->setL0(tgvUxL0);
@@ -105,7 +99,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 		logFileData->setBasicSimulation(TaylorGreenVortexUx);
 	}
 	if (logFileData->getSimName() == "TaylorGreenVortexUz") {
-		std::vector<double> tgvUzLz = StringUtil::toDoubleVector(input->getValue("Lx"));
+		std::vector<double> tgvUzLz = StringUtil::toDoubleVector(input->getValue<std::string>("Lx"));
 		logFileData->setBasicGridLengths(tgvUzLz);
 		std::vector<int> tgvUzL0;
 		std::vector<double> tgvUzUz;
@@ -115,9 +109,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 			l0 << "l0_" << tgvUzLz.at(i);
 			uz << "uz_" << tgvUzLz.at(i);
 			amplitude << "Amplitude_" << tgvUzLz.at(i);
-			tgvUzL0.push_back(StringUtil::toInt(input->getValue(l0.str())));
-			tgvUzUz.push_back(StringUtil::toDouble(input->getValue(uz.str())));
-			tgvUzAmp.push_back(StringUtil::toDouble(input->getValue(amplitude.str())));
+			tgvUzL0.push_back(StringUtil::toInt(input->getValue<std::string>(l0.str())));
+			tgvUzUz.push_back(StringUtil::toDouble(input->getValue<std::string>(uz.str())));
+			tgvUzAmp.push_back(StringUtil::toDouble(input->getValue<std::string>(amplitude.str())));
 		}
 		std::shared_ptr<TaylorGreenVortexUzLogFileDataImp> tgvUzLogFileData = TaylorGreenVortexUzLogFileDataImp::getNewInstance();
 		tgvUzLogFileData->setL0(tgvUzL0);
@@ -140,10 +134,10 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 		resultsCheckTimeOStringStream << "ResultsCheckTime_" << logFileData->getBasicGridLengths().at(i);
 		testTimeOStringStream << "TestTime_" << logFileData->getBasicGridLengths().at(i);
 		analyticalVTKWritingTimeOStringStream << "AnalyticalVTKFileWritingTime_" << logFileData->getBasicGridLengths().at(i);
-		std::string simTimeString = input->getValue(simTimeOStringStream.str());
-		std::string resultCheckTimeString = input->getValue(resultsCheckTimeOStringStream.str());
-		std::string testTimeString = input->getValue(testTimeOStringStream.str());
-		std::string analyticalVTKWritingTimeString = input->getValue(analyticalVTKWritingTimeOStringStream.str());
+		std::string simTimeString = input->getValue<std::string>(simTimeOStringStream.str());
+		std::string resultCheckTimeString = input->getValue<std::string>(resultsCheckTimeOStringStream.str());
+		std::string testTimeString = input->getValue<std::string>(testTimeOStringStream.str());
+		std::string analyticalVTKWritingTimeString = input->getValue<std::string>(analyticalVTKWritingTimeOStringStream.str());
 		simTimeString.erase(simTimeString.end() - 3, simTimeString.end());
 		resultCheckTimeString.erase(resultCheckTimeString.end() - 3, resultCheckTimeString.end());
 		testTimeString.erase(testTimeString.end() - 3, testTimeString.end());
@@ -154,29 +148,29 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 		analyticalVTKWritingTime.push_back(StringUtil::toInt(analyticalVTKWritingTimeString));
 	}
 
-	logFileData->setVTKFileWriting(StringUtil::toBool(input->getValue("VTKFileWriting")));
+	logFileData->setVTKFileWriting(StringUtil::toBool(input->getValue<std::string>("VTKFileWriting")));
 	logFileData->setSimTime(simTime);
 	logFileData->setResultCheckTime(resultsCheckTime);
 	logFileData->setTestTime(testTime);
 	logFileData->setAnalyticalVTKWritingTime(analyticalVTKWritingTime);
 	
-	logFileData->setPhiTestRun(StringUtil::toBool(input->getValue("PhiTest")));
-	logFileData->setNyTestRun(StringUtil::toBool(input->getValue("NyTest")));
-	logFileData->setL2NormTestRun(StringUtil::toBool(input->getValue("L2NormTest")));
-	logFileData->setL2NormTestBetweenKernelRun(StringUtil::toBool(input->getValue("L2NormTestBetweenKernel")));
+	logFileData->setPhiTestRun(StringUtil::toBool(input->getValue<std::string>("PhiTest")));
+	logFileData->setNyTestRun(StringUtil::toBool(input->getValue<std::string>("NyTest")));
+	logFileData->setL2NormTestRun(StringUtil::toBool(input->getValue<std::string>("L2NormTest")));
+	logFileData->setL2NormTestBetweenKernelRun(StringUtil::toBool(input->getValue<std::string>("L2NormTestBetweenKernel")));
 
 	if (logFileData->getPhiTestRun()) {
-		std::vector<std::string> failPhi = StringUtil::toStringVector(input->getValue("FailTests_Phi_PhiTest"));
-		std::vector<std::string> failOOA = StringUtil::toStringVector(input->getValue("FailTests_OOA_PhiTest"));
+		std::vector<std::string> failPhi = StringUtil::toStringVector(input->getValue<std::string>("FailTests_Phi_PhiTest"));
+		std::vector<std::string> failOOA = StringUtil::toStringVector(input->getValue<std::string>("FailTests_OOA_PhiTest"));
 
-		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue("DataToCalc_PhiTest"));
+		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue<std::string>("DataToCalc_PhiTest"));
 		std::vector<std::shared_ptr<PhiLogFileData> > aPhiLogGroup;
 		for (int i = 0; i < dataToCalc.size(); i++) {
 			std::shared_ptr<PhiLogFileDataImp> phiLog = PhiLogFileDataImp::getNewInstance();
 			phiLog->setBasicGridLengths(logFileData->getBasicGridLengths());
 			phiLog->setDataToCalc(dataToCalc.at(i));
-			phiLog->setStartTimeStepCalculation(StringUtil::toInt(input->getValue("StartTimeStepCalculation_PhiTest")));
-			phiLog->setEndTimeStepCalculation(StringUtil::toInt(input->getValue("EndTimeStepCalculation_PhiTest")));
+			phiLog->setStartTimeStepCalculation(StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_PhiTest")));
+			phiLog->setEndTimeStepCalculation(StringUtil::toInt(input->getValue<std::string>("EndTimeStepCalculation_PhiTest")));
 
 			std::vector<double> phiDiff;
 			std::vector<std::vector<double> > orderOfAccuracy;
@@ -190,7 +184,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 				}
 				if (!failData) {
 					phiDiffString << "PhiDiff_" << logFileData->getBasicGridLengths().at(j) << "_" << dataToCalc.at(i);
-					phiDiff.push_back(StringUtil::toDouble(input->getValue(phiDiffString.str())));
+					phiDiff.push_back(StringUtil::toDouble(input->getValue<std::string>(phiDiffString.str())));
 				}
 
 				for (int k = j + 1; k < logFileData->getBasicGridLengths().size(); k++) {
@@ -206,7 +200,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 						phiDiffOOA << "OrderOfAccuracy_PhiDiff_" << phiDiffBasicOOA.str();
 						aOrderOfAccuracyGroup.push_back(logFileData->getBasicGridLengths().at(j));
 						aOrderOfAccuracyGroup.push_back(logFileData->getBasicGridLengths().at(k));
-						aOrderOfAccuracyGroup.push_back(StringUtil::toDouble(input->getValue(phiDiffOOA.str())));
+						aOrderOfAccuracyGroup.push_back(StringUtil::toDouble(input->getValue<std::string>(phiDiffOOA.str())));
 					}
 					if (aOrderOfAccuracyGroup.size() > 0)
 						orderOfAccuracy.push_back(aOrderOfAccuracyGroup);
@@ -231,17 +225,17 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 
 
 	if (logFileData->getNyTestRun()) {
-		std::vector<std::string> failNy = StringUtil::toStringVector(input->getValue("FailTests_Ny_NyTest"));
-		std::vector<std::string> failOOA = StringUtil::toStringVector(input->getValue("FailTests_OOA_NyTest"));
+		std::vector<std::string> failNy = StringUtil::toStringVector(input->getValue<std::string>("FailTests_Ny_NyTest"));
+		std::vector<std::string> failOOA = StringUtil::toStringVector(input->getValue<std::string>("FailTests_OOA_NyTest"));
 
-		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue("DataToCalc_NyTest"));
+		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue<std::string>("DataToCalc_NyTest"));
 		std::vector<std::shared_ptr<NyLogFileData> > aNyLogGroup;
 		for (int i = 0; i < dataToCalc.size(); i++) {
 			std::shared_ptr<NyLogFileDataImp> nyLog = NyLogFileDataImp::getNewInstance();
 			nyLog->setBasicGridLengths(logFileData->getBasicGridLengths());
 			nyLog->setDataToCalc(dataToCalc.at(i));
-			nyLog->setStartTimeStepCalculation(StringUtil::toInt(input->getValue("StartTimeStepCalculation_NyTest")));
-			nyLog->setEndTimeStepCalculation(StringUtil::toInt(input->getValue("EndTimeStepCalculation_NyTest")));
+			nyLog->setStartTimeStepCalculation(StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_NyTest")));
+			nyLog->setEndTimeStepCalculation(StringUtil::toInt(input->getValue<std::string>("EndTimeStepCalculation_NyTest")));
 
 			std::vector<double> ny, nyDiff;
 			std::vector<std::vector<double> > orderOfAccuracy;
@@ -255,9 +249,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 				}
 				if (!failData) {
 					nyString << "Ny_" << nyBasicString.str();
-					ny.push_back(StringUtil::toDouble(input->getValue(nyString.str())));
+					ny.push_back(StringUtil::toDouble(input->getValue<std::string>(nyString.str())));
 					nyDiffString << "NyDiff_" << logFileData->getBasicGridLengths().at(j) << "_" << dataToCalc.at(i);
-					nyDiff.push_back(StringUtil::toDouble(input->getValue(nyDiffString.str())));
+					nyDiff.push_back(StringUtil::toDouble(input->getValue<std::string>(nyDiffString.str())));
 				}			
 
 				
@@ -274,7 +268,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 						nyDiffOOA << "OrderOfAccuracy_NyDiff_" << nyDiffBasicOOA.str();
 						aOrderOfAccuracyGroup.push_back(logFileData->getBasicGridLengths().at(j));
 						aOrderOfAccuracyGroup.push_back(logFileData->getBasicGridLengths().at(k));
-						aOrderOfAccuracyGroup.push_back(StringUtil::toDouble(input->getValue(nyDiffOOA.str())));
+						aOrderOfAccuracyGroup.push_back(StringUtil::toDouble(input->getValue<std::string>(nyDiffOOA.str())));
 					}
 					if (aOrderOfAccuracyGroup.size() > 0)
 						orderOfAccuracy.push_back(aOrderOfAccuracyGroup);
@@ -300,17 +294,17 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 
 	if (logFileData->getL2NormTestRun()) {
 		std::vector<std::shared_ptr<L2NormLogFileData> > l2NormGroup;
-		std::vector<std::string> dataToCalcL2Norm = StringUtil::toStringVector(input->getValue("DataToCalc_L2Norm"));
-		std::vector<std::string> normData = StringUtil::toStringVector(input->getValue("NormalizeData_L2Norm"));
-		std::vector<std::string> failL2Norm = StringUtil::toStringVector(input->getValue("FailTests_L2Norm"));
+		std::vector<std::string> dataToCalcL2Norm = StringUtil::toStringVector(input->getValue<std::string>("DataToCalc_L2Norm"));
+		std::vector<std::string> normData = StringUtil::toStringVector(input->getValue<std::string>("NormalizeData_L2Norm"));
+		std::vector<std::string> failL2Norm = StringUtil::toStringVector(input->getValue<std::string>("FailTests_L2Norm"));
 		for (int i = 0; i < dataToCalcL2Norm.size(); i++) {
 			for (int k = 0; k < normData.size(); k++) {
 				std::shared_ptr<L2NormLogFileDataImp> aL2Norm = L2NormLogFileDataImp::getNewInstance();
 				aL2Norm->setDataToCalc(dataToCalcL2Norm.at(i));
 				aL2Norm->setNormalizeData(normData.at(k));
 				aL2Norm->setBasicGridLengths(logFileData->getBasicGridLengths());
-				aL2Norm->setBasicTimeStep(StringUtil::toInt(input->getValue("BasicTimeStep_L2Norm")));
-				aL2Norm->setDivergentTimeStep(StringUtil::toInt(input->getValue("DivergentTimeStep_L2Norm")));
+				aL2Norm->setBasicTimeStep(StringUtil::toInt(input->getValue<std::string>("BasicTimeStep_L2Norm")));
+				aL2Norm->setDivergentTimeStep(StringUtil::toInt(input->getValue<std::string>("DivergentTimeStep_L2Norm")));
 
 				std::vector<double>  l2NormBasicTimeStep;
 				std::vector<double>  l2NormDivergentTimeStep;
@@ -327,9 +321,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 						basicTimeStep << "L2Norm_BasicTimeStep_" << basicString.str();
 						divergentTimeStep << "L2Norm_DivergentTimeStep_" << basicString.str();
 						diff << "L2Norm_Diff_" << basicString.str();
-						l2NormBasicTimeStep.push_back(StringUtil::toDouble(input->getValue(basicTimeStep.str())));
-						l2NormDivergentTimeStep.push_back(StringUtil::toDouble(input->getValue(divergentTimeStep.str())));
-						l2NormDiff.push_back(StringUtil::toDouble(input->getValue(diff.str())));
+						l2NormBasicTimeStep.push_back(StringUtil::toDouble(input->getValue<std::string>(basicTimeStep.str())));
+						l2NormDivergentTimeStep.push_back(StringUtil::toDouble(input->getValue<std::string>(divergentTimeStep.str())));
+						l2NormDiff.push_back(StringUtil::toDouble(input->getValue<std::string>(diff.str())));
 					}
 				}
 				if (l2NormBasicTimeStep.size() > 0) {
@@ -380,10 +374,10 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 
 	if (logFileData->getL2NormTestBetweenKernelRun()) {
 		std::vector<std::shared_ptr<L2NormBetweenKernelsLogFileData> > l2NormBetweenKernelsData;
-		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue("DataToCalculate_L2Norm_BK"));
-		std::vector<int> timeSteps = StringUtil::toIntVector(input->getValue("TimeSteps_L2Norm_BK"));
-		std::vector<std::string> normalizeData = StringUtil::toStringVector(input->getValue("NormalizeWith_L2Norm_BK"));
-		std::vector<std::string> failL2Norm = StringUtil::toStringVector(input->getValue("FailTests_L2Norm_BK"));
+		std::vector<std::string> dataToCalc = StringUtil::toStringVector(input->getValue<std::string>("DataToCalculate_L2Norm_BK"));
+		std::vector<int> timeSteps = StringUtil::toIntVector(input->getValue<std::string>("TimeSteps_L2Norm_BK"));
+		std::vector<std::string> normalizeData = StringUtil::toStringVector(input->getValue<std::string>("NormalizeWith_L2Norm_BK"));
+		std::vector<std::string> failL2Norm = StringUtil::toStringVector(input->getValue<std::string>("FailTests_L2Norm_BK"));
 
 
 		for (int i = 0; i < dataToCalc.size(); i++) {
@@ -393,7 +387,7 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 					std::vector<double> l2NormDivergentKernel;
 					std::vector<double> l2NormBetweenKernels;
 					std::shared_ptr<L2NormBetweenKernelsLogFileDataImp> aL2NormLogFileData = L2NormBetweenKernelsLogFileDataImp::getNewInstance();
-					aL2NormLogFileData->setBasicKernel(input->getValue("BasicKernel_L2Norm_BK"));
+					aL2NormLogFileData->setBasicKernel(input->getValue<std::string>("BasicKernel_L2Norm_BK"));
 					aL2NormLogFileData->setDivergentKernel(logFileData->getKernel());
 					aL2NormLogFileData->setDataToCalculate(dataToCalc.at(i));
 					aL2NormLogFileData->setTimeStep(timeSteps.at(j));
@@ -415,9 +409,9 @@ std::shared_ptr<LogFileData> LogFileReader::readLogFileToLogFileData(std::string
 							basicKernel << "L2Norm_BasicKernel_" << basicString.str();
 							divergentKernel << "L2Norm_DivergentKernel_" << basicString.str();
 							diff << "L2Norm_Between_Kernels_" << basicString.str();
-							l2NormBasicKernel.push_back(StringUtil::toDouble(input->getValue(basicKernel.str())));
-							l2NormDivergentKernel.push_back(StringUtil::toDouble(input->getValue(divergentKernel.str())));
-							l2NormBetweenKernels.push_back(StringUtil::toDouble(input->getValue(diff.str())));
+							l2NormBasicKernel.push_back(StringUtil::toDouble(input->getValue<std::string>(basicKernel.str())));
+							l2NormDivergentKernel.push_back(StringUtil::toDouble(input->getValue<std::string>(divergentKernel.str())));
+							l2NormBetweenKernels.push_back(StringUtil::toDouble(input->getValue<std::string>(diff.str())));
 						}						
 					}
 					if (l2NormBasicKernel.size() > 0) {
diff --git a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
index 6aa5cfe7780267adfa5729c390c3ee0c0daeac18..7bff82d2118e449ea19a1cfd56ff261b730ff0d4 100644
--- a/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
+++ b/apps/gpu/tests/NumericalTestPostProcessing/Utilities/PostProcessingConfigFileReader/PostProcessingConfigFileReaderImp.cpp
@@ -1,6 +1,6 @@
 #include "PostProcessingConfigFileReaderImp.h"
 
-#include "Core/Input/Input.h"
+#include <basics/config/ConfigurationFile.h>
 #include "Core/StringUtilities/StringUtil.h"
 
 #include "Utilities/PostProcessingConfigData/PostProcessingConfigDataImp.h"
@@ -15,48 +15,42 @@ std::shared_ptr<PostProcessingConfigFileReader> PostProcessingConfigFileReaderIm
 
 std::shared_ptr<PostProcessingConfigData> PostProcessingConfigFileReaderImp::readConfigFile(std::string filePath)
 {
-	std::ifstream stream;
-	stream.open(filePath.c_str(), std::ios::in);
-	if (stream.fail()) {
-		throw "can not open config file!\n";
-		exit(1);
-	}
-	std::shared_ptr<input::Input> input = input::Input::makeInput(stream, "config");
-
+	auto input = std::make_shared<vf::basics::ConfigurationFile>();
+	input->load(filePath);
 
 	std::vector<BasicSimulation> simulation;
 	std::vector<Assistant> assistants;
 	std::vector<DataCombination> combination;
 
-	if(StringUtil::toBool(input->getValue("ShearWave")))
+	if(StringUtil::toBool(input->getValue<std::string>("ShearWave")))
 		simulation.push_back(ShearWave);
 
-	if (StringUtil::toBool(input->getValue("TaylorGreenVortexUx")))
+	if (StringUtil::toBool(input->getValue<std::string>("TaylorGreenVortexUx")))
 		simulation.push_back(TaylorGreenVortexUx);
 
-	if (StringUtil::toBool(input->getValue("TaylorGreenVortexUz")))
+	if (StringUtil::toBool(input->getValue<std::string>("TaylorGreenVortexUz")))
 		simulation.push_back(TaylorGreenVortexUz);
 
-	if (StringUtil::toBool(input->getValue("Phi")))
+	if (StringUtil::toBool(input->getValue<std::string>("Phi")))
 		assistants.push_back(Phi);
 
-	if (StringUtil::toBool(input->getValue("Ny")))
+	if (StringUtil::toBool(input->getValue<std::string>("Ny")))
 		assistants.push_back(Ny);
 
-	if (StringUtil::toBool(input->getValue("L2Norm")))
+	if (StringUtil::toBool(input->getValue<std::string>("L2Norm")))
 		assistants.push_back(L2Norm);
 
-	if (StringUtil::toBool(input->getValue("L2Norm_BetweenKernels")))
+	if (StringUtil::toBool(input->getValue<std::string>("L2Norm_BetweenKernels")))
 		assistants.push_back(L2NormBetweenKernels);
 
-	if (StringUtil::toBool(input->getValue("TimeOutput")))
+	if (StringUtil::toBool(input->getValue<std::string>("TimeOutput")))
 		assistants.push_back(Time);
 
 
-	if (StringUtil::toBool(input->getValue("EqualSimulationsForDifferentKernels")))
+	if (StringUtil::toBool(input->getValue<std::string>("EqualSimulationsForDifferentKernels")))
 		combination.push_back(EqualSimulationsForDifferentKernels);
 
-	if (StringUtil::toBool(input->getValue("EqualKernelSimulationsForDifferentViscosities")))
+	if (StringUtil::toBool(input->getValue<std::string>("EqualKernelSimulationsForDifferentViscosities")))
 		combination.push_back(EqualKernelSimulationsForDifferentViscosities);
 
 	std::shared_ptr<PostProcessingConfigDataImp> data = PostProcessingConfigDataImp::getNewInstance();
@@ -65,8 +59,8 @@ std::shared_ptr<PostProcessingConfigData> PostProcessingConfigFileReaderImp::rea
 	data->setSimulations(simulation);
 	data->setDataCombinations(combination);
 
-	data->setLogFilesPath(input->getValue("LogFilesPath"));
-	data->setMathematicaFilePath(input->getValue("MathematicaFilePath"));
+	data->setLogFilesPath(input->getValue<std::string>("LogFilesPath"));
+	data->setMathematicaFilePath(input->getValue<std::string>("MathematicaFilePath"));
 	
 	return data;
 }
diff --git a/apps/gpu/tests/NumericalTests/Tests/NyTest/PostProcessingStrategy/NyTestPostProcessingStrategy.cpp b/apps/gpu/tests/NumericalTests/Tests/NyTest/PostProcessingStrategy/NyTestPostProcessingStrategy.cpp
index ec87f5ed96a289dc06e02e94239f936518bf18c8..4147d91533ec010f0ca0c0b8960abab420c77ad9 100644
--- a/apps/gpu/tests/NumericalTests/Tests/NyTest/PostProcessingStrategy/NyTestPostProcessingStrategy.cpp
+++ b/apps/gpu/tests/NumericalTests/Tests/NyTest/PostProcessingStrategy/NyTestPostProcessingStrategy.cpp
@@ -6,6 +6,7 @@
 
 #include "Tests/NyTest/NyTestParameterStruct.h"
 
+
 std::shared_ptr<NyTestPostProcessingStrategy> NyTestPostProcessingStrategy::getNewInstance(std::shared_ptr<SimulationResults> simResult, std::shared_ptr<AnalyticalResults> analyticalResult, std::shared_ptr<NyTestParameterStruct> testPara, std::vector<std::string> dataToCalcTests)
 {
 	return std::shared_ptr<NyTestPostProcessingStrategy>(new NyTestPostProcessingStrategy(simResult, analyticalResult, testPara, dataToCalcTests));
diff --git a/apps/gpu/tests/NumericalTests/Tests/PhiTest/PhiTest.cpp b/apps/gpu/tests/NumericalTests/Tests/PhiTest/PhiTest.cpp
index e3d749293793250893c78958eaa777fd1f3b87d7..bf93a95f450ff1a21062644d9be098eb2c2c235f 100644
--- a/apps/gpu/tests/NumericalTests/Tests/PhiTest/PhiTest.cpp
+++ b/apps/gpu/tests/NumericalTests/Tests/PhiTest/PhiTest.cpp
@@ -19,6 +19,7 @@ void PhiTest::evaluate()
 {
 	for (int i = 0; i < postProStrategies.size(); i++)
 		phiDiff.push_back(postProStrategies.at(i)->getPhiDiff(dataToCalculate));
+	
 	orderOfAccuracy = calcOrderOfAccuracy(phiDiff);
 	testStatus = checkTestPassed(orderOfAccuracy);
 	
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Calculator/FFTCalculator/FFTCalculator.cpp b/apps/gpu/tests/NumericalTests/Utilities/Calculator/FFTCalculator/FFTCalculator.cpp
index 6942099bcf441e0f06b580a15a813f6eb34f5eec..7b145d049f42573c0ce59aa6245808bd93e35267 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Calculator/FFTCalculator/FFTCalculator.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/Calculator/FFTCalculator/FFTCalculator.cpp
@@ -3,265 +3,264 @@
 #include "Utilities/Results/SimulationResults/SimulationResults.h"
 
 #define _USE_MATH_DEFINES
-#include <math.h>
-#include <stdlib.h>
 #include <fstream>
+#include <math.h>
 
 std::shared_ptr<FFTCalculator> FFTCalculator::getInstance()
 {
-	static std::shared_ptr<FFTCalculator> uniqueInstance;
-	if (!uniqueInstance)
-		uniqueInstance = std::shared_ptr<FFTCalculator>(new FFTCalculator());
-	return uniqueInstance;
+    static std::shared_ptr<FFTCalculator> uniqueInstance;
+    if (!uniqueInstance)
+        uniqueInstance = std::shared_ptr<FFTCalculator>(new FFTCalculator());
+    return uniqueInstance;
 }
 
-double FFTCalculator::calcNy(std::vector<std::vector<double>> data, bool transposeData, int lx, int lz, int timeStepLength)
+double FFTCalculator::calcNy(std::vector<std::vector<double>> data, bool transposeData, int lx, int lz,
+                             int timeStepLength)
 {
-	this->lx = (double)lx;
-	this->lz = (double)lz;
-	this->timeStepLength = (double)timeStepLength;
-	this->transposeData = transposeData;
-	if (!transposeData)
-		this->data = data;
-	else
-		this->data = transpose(data);
-
-	init();
-
-	double ny = calcNy();
-	return ny;
+    this->lx = (double)lx;
+    this->lz = (double)lz;
+    this->timeStepLength = (double)timeStepLength;
+    this->transposeData = transposeData;
+    if (!transposeData)
+        this->data = data;
+    else
+        this->data = transpose(data);
+
+    init();
+
+    double ny = calcNy();
+    return ny;
 }
 
-double FFTCalculator::calcPhiDiff(std::vector<std::vector<double>> data, bool transposeData, int lx, int lz, int timeStepLength)
+double FFTCalculator::calcPhiDiff(std::vector<std::vector<double>> data, bool transposeData, int lx, int lz,
+                                  int timeStepLength)
 {
-	this->lx = (double)lx;
-	this->lz = (double)lz;
-	this->timeStepLength = (double)timeStepLength;
-	this->transposeData = transposeData;
-	if (!transposeData)
-		this->data = data;
-	else
-		this->data = transpose(data);
-
-	init();
-
-	double phidiff = calcPhiDiff();
-	return abs(phidiff);
+    this->lx = (double)lx;
+    this->lz = (double)lz;
+    this->timeStepLength = (double)timeStepLength;
+    this->transposeData = transposeData;
+    if (!transposeData)
+        this->data = data;
+    else
+        this->data = transpose(data);
+
+    init();
+
+    double phidiff = calcPhiDiff();
+
+    return abs(phidiff);
 }
 
 FFTCalculator::FFTCalculator()
 {
-
 }
 
 double FFTCalculator::calcAmplitudeForTimeStep(std::vector<double> data, bool transposeData, int lx, int lz)
 {
-	this->lx = (double)lx;
-	this->lz = (double)lz;
-	init();
-	this->transposeData = transposeData;
-	this->data.resize(0);
-	this->data.push_back(data);
-	std::vector<double> amplitude = calcAmplitudeForAllSteps();
-	return amplitude.at(0);
+    this->lx = (double)lx;
+    this->lz = (double)lz;
+    init();
+    this->transposeData = transposeData;
+    this->data.resize(0);
+    this->data.push_back(data);
+    std::vector<double> amplitude = calcAmplitudeForAllSteps();
+    return amplitude.at(0);
 }
 
 void FFTCalculator::init()
 {
-	fftResultsIm.clear();
-	fftResultsRe.clear();
-	fftCalculated = false;
+    fftResultsIm.clear();
+    fftResultsRe.clear();
+    fftCalculated = false;
 }
 
 double FFTCalculator::calcNy()
 {
-	std::vector<double> logAmplitude = calcLogAmplitudeForAllSteps();
-	std::vector<double> linReg = calcLinReg(logAmplitude);
-	double nu = -(1.0 / (((2.0 * M_PI / lz) * (2.0 * M_PI / lz) + (2.0 * M_PI / lx)*(2.0 * M_PI / lx)) * timeStepLength)) * linReg.at(0);
-
-	return nu;
+    std::vector<double> logAmplitude = calcLogAmplitudeForAllSteps();
+    std::vector<double> linReg = calcLinReg(logAmplitude);
+    double nu =
+        -(1.0 / (((2.0 * M_PI / lz) * (2.0 * M_PI / lz) + (2.0 * M_PI / lx) * (2.0 * M_PI / lx)) * timeStepLength)) *
+        linReg.at(0);
+    return nu;
 }
 
 double FFTCalculator::calcPhiDiff()
 {
-	std::vector<double> phi = calcPhiForAllSteps();
-	std::vector<double> linReg = calcLinReg(phi);
+    std::vector<double> phi = calcPhiForAllSteps();
+    std::vector<double> linReg = calcLinReg(phi);
 
-	return linReg.at(0);
+    return linReg.at(0);
 }
 
 std::vector<double> FFTCalculator::calcLinReg(std::vector<double> y)
 {
-	std::vector<double> result;
-	std::vector<double> x(y.size());
-	double sumX = 0.0;
-	double sumY = 0.0;
-
-	for (int i = 0; i < y.size(); i++)
-	{
-		sumY += y.at(i);
-		x.at(i) = i;
-		sumX += i;
-	}
-	double avgX = sumX / y.size();
-	double avgY = sumY / y.size();
-	double zaehler = 0.0;
-	double nenner = 0.0;
-	for (int i = 0; i < y.size(); i++)
-	{
-		zaehler += (x.at(i) - avgX) * (y.at(i) - avgY);
-		nenner += (x.at(i) - avgX) * (x.at(i) - avgX);
-	}
-	double a1 = zaehler / nenner;
-	result.push_back(a1);
-	double a0 = avgY - a1*avgX;
-	result.push_back(a0);
-
-	double ess = 0;
-	double tss = 0;
-	for (int i = 0; i < y.size(); i++)
-	{
-		ess += ((a0+a1*x.at(i))-avgY) * ((a0 + a1*x.at(i)) - avgY);
-		tss += (y.at(i)-avgY) * (y.at(i) - avgY);
-	}
-	double r2 = ess / tss;
-	result.push_back(r2);
-	return result;
+    std::vector<double> result;
+    std::vector<double> x(y.size());
+    double sumX = 0.0;
+    double sumY = 0.0;
+
+    for (int i = 0; i < y.size(); i++) {
+        sumY += y.at(i);
+        x.at(i) = i;
+        sumX += i;
+    }
+    double avgX = sumX / y.size();
+    double avgY = sumY / y.size();
+    double zaehler = 0.0;
+    double nenner = 0.0;
+    for (int i = 0; i < y.size(); i++) {
+        zaehler += (x.at(i) - avgX) * (y.at(i) - avgY);
+        nenner += (x.at(i) - avgX) * (x.at(i) - avgX);
+    }
+    double a1 = zaehler / nenner;
+    result.push_back(a1);
+    double a0 = avgY - a1 * avgX;
+    result.push_back(a0);
+
+    double ess = 0;
+    double tss = 0;
+    for (int i = 0; i < y.size(); i++) {
+        ess += ((a0 + a1 * x.at(i)) - avgY) * ((a0 + a1 * x.at(i)) - avgY);
+        tss += (y.at(i) - avgY) * (y.at(i) - avgY);
+    }
+    double r2 = ess / tss;
+    result.push_back(r2);
+    return result;
 }
 
 std::vector<double> FFTCalculator::calcLogAmplitudeForAllSteps()
 {
-	std::vector<double> amplitude = calcAmplitudeForAllSteps();
-	std::vector<double> logAmplitude;
-	for (int i = 0; i < amplitude.size(); i++)
-		logAmplitude.push_back(log(amplitude.at(i)));
+    std::vector<double> amplitude = calcAmplitudeForAllSteps();
+    std::vector<double> logAmplitude;
+    for (int i = 0; i < amplitude.size(); i++)
+        logAmplitude.push_back(log(amplitude.at(i)));
 
-	return logAmplitude;
+    return logAmplitude;
 }
 
 std::vector<double> FFTCalculator::calcAmplitudeForAllSteps()
 {
-	std::vector<double> amplitude;
-	if (fftCalculated == false) {
-		for (int step = 0; step < data.size(); step++)
-			calcFFT2D(step);
-		fftCalculated = true;
-	}
-	int pos;
-	if (!transposeData)
-		pos = 2 + (lx - 1);
-	else
-		pos = 2 + (lz - 1);
-
-	for (int step = 0; step < data.size(); step++)
-		amplitude.push_back(4.0 / (lx * lz)  * sqrt(fftResultsRe.at(step).at(pos) * fftResultsRe.at(step).at(pos) + fftResultsIm.at(step).at(pos) * fftResultsIm.at(step).at(pos)));
-
-	return amplitude;
+    std::vector<double> amplitude;
+    if (fftCalculated == false) {
+        for (int step = 0; step < data.size(); step++)
+            calcFFT2D(step);
+        fftCalculated = true;
+    }
+    int pos;
+    if (!transposeData)
+        pos = 2 + (lx - 1);
+    else
+        pos = 2 + (lz - 1);
+
+    for (int step = 0; step < data.size(); step++)
+        amplitude.push_back(4.0 / (lx * lz) *
+                            sqrt(fftResultsRe.at(step).at(pos) * fftResultsRe.at(step).at(pos) +
+                                 fftResultsIm.at(step).at(pos) * fftResultsIm.at(step).at(pos)));
+
+    return amplitude;
 }
 
 std::vector<double> FFTCalculator::calcPhiForAllSteps()
 {
-	std::vector<double> phi;
-	if (fftCalculated == false) {
-		for (int step = 0; step < data.size(); step++)
-			calcFFT2D(step);
-		fftCalculated = true;
-	}
-	int pos;
-	if (!transposeData)
-		pos = 2 + (lx - 1);
-	else
-		pos = 2 + (lz - 1);
-
-	for (int step = 0; step < data.size(); step++) {
-		phi.push_back(atan(fftResultsIm.at(step).at(pos) / fftResultsRe.at(step).at(pos)));
-	}
-		
-	return phi;
+    std::vector<double> phi;
+    if (fftCalculated == false) {
+        for (int step = 0; step < data.size(); step++)
+            calcFFT2D(step);
+        fftCalculated = true;
+    }
+    int pos;
+    if (!transposeData)
+        pos = 2 + (lx - 1);
+    else
+        pos = 2 + (lz - 1);
+
+    for (int step = 0; step < data.size(); step++) {
+        phi.push_back(atan(fftResultsIm.at(step).at(pos) / fftResultsRe.at(step).at(pos)));
+    }
+
+    return phi;
 }
 
 void FFTCalculator::calcFFT2D(unsigned int timeStep)
 {
-	fftw_complex *in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * lx * lz);
-	fftw_complex *out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * lx * lz);
+    fftw_complex *in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * lx * lz);
+    fftw_complex *out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * lx * lz);
 
-	initDataForFFT(in, timeStep);
+    initDataForFFT(in, timeStep);
 
-	fftw_plan p;
-	if (!transposeData)
-		p = fftw_plan_dft_2d(lz, lx, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
-	else
-		p = fftw_plan_dft_2d(lx, lz, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
-	fftw_execute(p);
+    fftw_plan p;
+    if (!transposeData)
+        p = fftw_plan_dft_2d(lz, lx, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
+    else
+        p = fftw_plan_dft_2d(lx, lz, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
+    fftw_execute(p);
 
-	setFFTResults(out, timeStep);
+    setFFTResults(out, timeStep);
 
-	fftw_destroy_plan(p);
-	fftw_free(in);
-	fftw_free(out);
+    fftw_destroy_plan(p);
+    fftw_free(in);
+    fftw_free(out);
 }
 
-std::vector<std::vector<double> > FFTCalculator::transpose(std::vector<std::vector<double> > dataToTranspose)
+std::vector<std::vector<double>> FFTCalculator::transpose(std::vector<std::vector<double>> dataToTranspose)
 {
-	std::vector<std::vector<std::vector<double> >> dataInLx;
-	dataInLx.resize(dataToTranspose.size());
-	for (int i = 0; i < dataInLx.size(); i++) {
-		dataInLx.at(i).resize(lz);
-		for(int j = 0; j < dataInLx.at(i).size(); j++)
-			dataInLx.at(i).at(j).resize(lx);
-	}
-	for (int timeStep = 0; timeStep < dataInLx.size(); timeStep++) {
-		for (int posInLZ = 0; posInLZ < lz; posInLZ++)
-			for (int posInLX = 0; posInLX < lx; posInLX++)
-				dataInLx.at(timeStep).at(posInLZ).at(posInLX) = dataToTranspose.at(timeStep).at(posInLX + posInLZ*lx);
-	}
-
-	std::vector<std::vector<std::vector<double> >> dataInLz;
-	dataInLz.resize(dataToTranspose.size());
-	for (int i = 0; i < dataInLx.size(); i++) {
-		dataInLz.at(i).resize(lx);
-		for (int j = 0; j < dataInLz.at(i).size(); j++)
-			dataInLz.at(i).at(j).resize(lz);
-	}
-
-	for (int timeStep = 0; timeStep < dataInLz.size(); timeStep++) {
-		for (int posInLX = 0; posInLX < lx; posInLX++)
-			for (int posInLZ = 0; posInLZ < lz; posInLZ++)
-				dataInLz.at(timeStep).at(posInLX).at(posInLZ) = dataInLx.at(timeStep).at(posInLZ).at(posInLX);
-	}
-
-	std::vector<std::vector<double> > result;
-	result.resize(dataToTranspose.size());
-
-	for (int timeStep = 0; timeStep < dataInLz.size(); timeStep++) {
-		result.at(timeStep).resize(0);
-		for (int posInLX = 0; posInLX < lx; posInLX++)
-			for (int posInLZ = 0; posInLZ < lz; posInLZ++)
-				result.at(timeStep).push_back(dataInLz.at(timeStep).at(posInLX).at(posInLZ));
-	}
-	return result;
+    std::vector<std::vector<std::vector<double>>> dataInLx;
+    dataInLx.resize(dataToTranspose.size());
+    for (int i = 0; i < dataInLx.size(); i++) {
+        dataInLx.at(i).resize(lz);
+        for (int j = 0; j < dataInLx.at(i).size(); j++)
+            dataInLx.at(i).at(j).resize(lx);
+    }
+    for (int timeStep = 0; timeStep < dataInLx.size(); timeStep++) {
+        for (int posInLZ = 0; posInLZ < lz; posInLZ++)
+            for (int posInLX = 0; posInLX < lx; posInLX++)
+                dataInLx.at(timeStep).at(posInLZ).at(posInLX) = dataToTranspose.at(timeStep).at(posInLX + posInLZ * lx);
+    }
+
+    std::vector<std::vector<std::vector<double>>> dataInLz;
+    dataInLz.resize(dataToTranspose.size());
+    for (int i = 0; i < dataInLx.size(); i++) {
+        dataInLz.at(i).resize(lx);
+        for (int j = 0; j < dataInLz.at(i).size(); j++)
+            dataInLz.at(i).at(j).resize(lz);
+    }
+
+    for (int timeStep = 0; timeStep < dataInLz.size(); timeStep++) {
+        for (int posInLX = 0; posInLX < lx; posInLX++)
+            for (int posInLZ = 0; posInLZ < lz; posInLZ++)
+                dataInLz.at(timeStep).at(posInLX).at(posInLZ) = dataInLx.at(timeStep).at(posInLZ).at(posInLX);
+    }
+
+    std::vector<std::vector<double>> result;
+    result.resize(dataToTranspose.size());
+
+    for (int timeStep = 0; timeStep < dataInLz.size(); timeStep++) {
+        result.at(timeStep).resize(0);
+        for (int posInLX = 0; posInLX < lx; posInLX++)
+            for (int posInLZ = 0; posInLZ < lz; posInLZ++)
+                result.at(timeStep).push_back(dataInLz.at(timeStep).at(posInLX).at(posInLZ));
+    }
+    return result;
 }
 
-void FFTCalculator::initDataForFFT(fftw_complex * input, unsigned int step)
+void FFTCalculator::initDataForFFT(fftw_complex *input, unsigned int step)
 {
-	for (int i = 0; i < data.at(step).size(); i++)
-	{
-		input[i][0] = data.at(step).at(i);
-		input[i][1] = 0;
-	}
+    for (int i = 0; i < data.at(step).size(); i++) {
+        input[i][0] = data.at(step).at(i);
+        input[i][1] = 0;
+    }
 }
 
-void FFTCalculator::setFFTResults(fftw_complex * result, unsigned int step)
+void FFTCalculator::setFFTResults(fftw_complex *result, unsigned int step)
 {
-	std::vector<double> fftRe, fftIm;
-	fftRe.resize(data.at(step).size());
-	fftIm.resize(data.at(step).size());
-
-	for (int i = 0; i < data.at(step).size(); i++)
-	{
-		fftRe.at(i) = result[i][0];
-		fftIm.at(i) = result[i][1];
-	}
-	fftResultsIm.push_back(fftIm);
-	fftResultsRe.push_back(fftRe);
+    std::vector<double> fftRe, fftIm;
+    fftRe.resize(data.at(step).size());
+    fftIm.resize(data.at(step).size());
+
+    for (int i = 0; i < data.at(step).size(); i++) {
+        fftRe.at(i) = result[i][0];
+        fftIm.at(i) = result[i][1];
+    }
+    fftResultsIm.push_back(fftIm);
+    fftResultsRe.push_back(fftRe);
 }
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
index 37734bc4dfbdd00df73a79947db2c07edb04ad18..26d4045fe4b14cf9ffc5dea16815d70c716de000 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.cpp
@@ -1,76 +1,20 @@
 #include "ConfigFileReaderNT.h"
 
-#include "Core/Input/Input.h"
+#include <basics/config/ConfigurationFile.h>
 #include "Core/StringUtilities/StringUtil.h"
 
+#include <memory>
 #include <fstream>
 #include <string>
 
 #define VAL(str) #str
 #define TOSTRING(str) VAL(str)
 
-std::shared_ptr<ConfigFileReader> ConfigFileReader::getNewInstance(const std::string aFilePath)
-{
-    return std::shared_ptr<ConfigFileReader>(new ConfigFileReader(aFilePath));
-}
-
-ConfigFileReader::ConfigFileReader(const std::string aFilePath) : myFilePath(aFilePath)
-{
-    // If PATH_NUMERICAL_TESTS is not defined, the grid definitions for the tests needs to be placed in the project root
-    // directories.
-#ifdef PATH_NUMERICAL_TESTS
-    pathNumericalTests = TOSTRING(PATH_NUMERICAL_TESTS) + std::string("/");
-#else
-    pathNumericalTests = TOSTRING(SOURCE_ROOT) + std::string("/");
-#endif
-    std::cout << pathNumericalTests << "\n";
-}
-
-void ConfigFileReader::readConfigFile()
-{
-    configData           = std::shared_ptr<ConfigDataStruct>(new ConfigDataStruct);
-    std::ifstream stream = openConfigFile(myFilePath);
-
-    std::shared_ptr<input::Input> input = input::Input::makeInput(stream, "config");
-
-    if (!checkConfigFile(input))
-        exit(1);
-
-    configData->viscosity            = StringUtil::toDoubleVector(input->getValue("Viscosity"));
-    configData->kernelsToTest        = readKernelList(input);
-    configData->writeAnalyticalToVTK = StringUtil::toBool(input->getValue("WriteAnalyResultsToVTK"));
-    configData->ySliceForCalculation = StringUtil::toInt(input->getValue("ySliceForCalculation"));
-    ;
-    configData->logFilePath         = pathNumericalTests + input->getValue("FolderLogFile");
-    configData->numberOfSimulations = calcNumberOfSimulations(input);
-
-    std::shared_ptr<BasicSimulationParameterStruct> basicSimPara = makeBasicSimulationParameter(input);
-
-    configData->taylorGreenVortexUxParameter       = makeTaylorGreenVortexUxParameter(input, basicSimPara);
-    configData->taylorGreenVortexUxGridInformation = makeGridInformation(input, "TaylorGreenVortexUx");
-    ;
-
-    configData->taylorGreenVortexUzParameter       = makeTaylorGreenVortexUzParameter(input, basicSimPara);
-    configData->taylorGreenVortexUzGridInformation = makeGridInformation(input, "TaylorGreenVortexUz");
-    ;
+using ConfigFilePtr = std::shared_ptr<vf::basics::ConfigurationFile>;
+using ConfigDataPtr = std::shared_ptr<ConfigDataStruct>;
 
-    configData->shearWaveParameter       = makeShearWaveParameter(input, basicSimPara);
-    configData->shearWaveGridInformation = makeGridInformation(input, "ShearWave");
-    ;
 
-    configData->phiTestParameter                  = makePhiTestParameter(input);
-    configData->nyTestParameter                   = makeNyTestParameter(input);
-    configData->l2NormTestParameter               = makeL2NormTestParameter(input);
-    configData->l2NormTestBetweenKernelsParameter = makeL2NormTestBetweenKernelsParameter(input);
-
-    configData->vectorWriterInfo = makeVectorWriterInformationStruct(input);
-
-    configData->logFilePara = makeLogFilePara(input);
-
-    stream.close();
-}
-
-std::ifstream ConfigFileReader::openConfigFile(const std::string aFilePath)
+std::ifstream openConfigFile(const std::string aFilePath)
 {
     std::ifstream stream;
     stream.open(aFilePath.c_str(), std::ios::in);
@@ -80,21 +24,19 @@ std::ifstream ConfigFileReader::openConfigFile(const std::string aFilePath)
     return stream;
 }
 
-std::shared_ptr<ConfigDataStruct> ConfigFileReader::getConfigData() { return configData; }
-
-bool ConfigFileReader::checkConfigFile(std::shared_ptr<input::Input> input)
+bool checkConfigFile(ConfigFilePtr input)
 {
-    std::vector<double> u0TGVux               = StringUtil::toDoubleVector(input->getValue("ux_TGV_Ux"));
-    std::vector<double> amplitudeTGVux        = StringUtil::toDoubleVector(input->getValue("Amplitude_TGV_Ux"));
-    std::vector<int> basisTimeStepLengthTGVux = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_TGV_Ux"));
+    std::vector<double> u0TGVux               = StringUtil::toDoubleVector(input->getValue<std::string>("ux_TGV_Ux"));
+    std::vector<double> amplitudeTGVux        = StringUtil::toDoubleVector(input->getValue<std::string>("Amplitude_TGV_Ux"));
+    std::vector<int> basisTimeStepLengthTGVux = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_TGV_Ux"));
 
-    std::vector<double> v0TGVuz               = StringUtil::toDoubleVector(input->getValue("uz_TGV_Uz"));
-    std::vector<double> amplitudeTGVuz        = StringUtil::toDoubleVector(input->getValue("Amplitude_TGV_Uz"));
-    std::vector<int> basisTimeStepLengthTGVuz = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_TGV_Uz"));
+    std::vector<double> v0TGVuz               = StringUtil::toDoubleVector(input->getValue<std::string>("uz_TGV_Uz"));
+    std::vector<double> amplitudeTGVuz        = StringUtil::toDoubleVector(input->getValue<std::string>("Amplitude_TGV_Uz"));
+    std::vector<int> basisTimeStepLengthTGVuz = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_TGV_Uz"));
 
-    std::vector<double> v0SW               = StringUtil::toDoubleVector(input->getValue("v0_SW"));
-    std::vector<double> u0SW               = StringUtil::toDoubleVector(input->getValue("u0_SW"));
-    std::vector<int> basisTimeStepLengthSW = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_SW"));
+    std::vector<double> v0SW               = StringUtil::toDoubleVector(input->getValue<std::string>("v0_SW"));
+    std::vector<double> u0SW               = StringUtil::toDoubleVector(input->getValue<std::string>("u0_SW"));
+    std::vector<int> basisTimeStepLengthSW = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_SW"));
 
     if (u0TGVux.size() != amplitudeTGVux.size() || u0TGVux.size() != basisTimeStepLengthTGVux.size()) {
         std::cout << "Length u0_TGV_U0 is unequal to Lenght Amplitude_TGV_U0 or BasisTimeStepLength_TGV_U0!"
@@ -115,24 +57,25 @@ bool ConfigFileReader::checkConfigFile(std::shared_ptr<input::Input> input)
 }
 
 std::shared_ptr<BasicSimulationParameterStruct>
-ConfigFileReader::makeBasicSimulationParameter(std::shared_ptr<input::Input> input)
+makeBasicSimulationParameter(ConfigFilePtr input)
 {
     std::shared_ptr<BasicSimulationParameterStruct> basicSimPara =
         std::shared_ptr<BasicSimulationParameterStruct>(new BasicSimulationParameterStruct);
 
-    basicSimPara->numberOfTimeSteps = StringUtil::toInt(input->getValue("NumberOfTimeSteps"));
-    basicSimPara->devices           = StringUtil::toUintVector(input->getValue("Devices"));
+    basicSimPara->numberOfTimeSteps = StringUtil::toInt(input->getValue<std::string>("NumberOfTimeSteps"));
+    basicSimPara->devices           = StringUtil::toUintVector(input->getValue<std::string>("Devices"));
     return basicSimPara;
 }
 
 std::vector<std::shared_ptr<TaylorGreenVortexUxParameterStruct>>
-ConfigFileReader::makeTaylorGreenVortexUxParameter(std::shared_ptr<input::Input> input,
-                                                   std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
+makeTaylorGreenVortexUxParameter(const std::string pathNumericalTests, 
+                                 ConfigFilePtr input,
+                                 std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
 {
-    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_TGV_Ux"));
-    std::vector<double> amplitude        = StringUtil::toDoubleVector(input->getValue("Amplitude_TGV_Ux"));
-    std::vector<double> u0               = StringUtil::toDoubleVector(input->getValue("ux_TGV_Ux"));
-    int l0                               = StringUtil::toInt(input->getValue("l0_TGV_Ux"));
+    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_TGV_Ux"));
+    std::vector<double> amplitude        = StringUtil::toDoubleVector(input->getValue<std::string>("Amplitude_TGV_Ux"));
+    std::vector<double> u0               = StringUtil::toDoubleVector(input->getValue<std::string>("ux_TGV_Ux"));
+    int l0                               = StringUtil::toInt(input->getValue<std::string>("l0_TGV_Ux"));
     basicSimParameter->l0                = l0;
 
     std::vector<std::shared_ptr<TaylorGreenVortexUxParameterStruct>> parameter;
@@ -145,22 +88,23 @@ ConfigFileReader::makeTaylorGreenVortexUxParameter(std::shared_ptr<input::Input>
         aParameter->amplitude           = amplitude.at(i);
         aParameter->basicTimeStepLength = basisTimeStepLength.at(i);
         aParameter->l0                  = l0;
-        aParameter->rho0                = StringUtil::toDouble(input->getValue("Rho0"));
-        aParameter->vtkFilePath         = pathNumericalTests + input->getValue("FolderForVTKFileWriting");
-        aParameter->dataToCalcTests     = StringUtil::toStringVector(input->getValue("DataToCalcTests_TGV_Ux"));
+        aParameter->rho0                = StringUtil::toDouble(input->getValue<std::string>("Rho0"));
+        aParameter->vtkFilePath         = pathNumericalTests + input->getValue<std::string>("FolderForVTKFileWriting");
+        aParameter->dataToCalcTests     = StringUtil::toStringVector(input->getValue<std::string>("DataToCalcTests_TGV_Ux"));
         parameter.push_back(aParameter);
     }
     return parameter;
 }
 
 std::vector<std::shared_ptr<TaylorGreenVortexUzParameterStruct>>
-ConfigFileReader::makeTaylorGreenVortexUzParameter(std::shared_ptr<input::Input> input,
-                                                   std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
+makeTaylorGreenVortexUzParameter(const std::string pathNumericalTests,
+                                 ConfigFilePtr input,
+                                 std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
 {
-    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_TGV_Uz"));
-    std::vector<double> amplitude        = StringUtil::toDoubleVector(input->getValue("Amplitude_TGV_Uz"));
-    std::vector<double> uz               = StringUtil::toDoubleVector(input->getValue("uz_TGV_Uz"));
-    int l0                               = StringUtil::toInt(input->getValue("l0_TGV_Uz"));
+    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_TGV_Uz"));
+    std::vector<double> amplitude        = StringUtil::toDoubleVector(input->getValue<std::string>("Amplitude_TGV_Uz"));
+    std::vector<double> uz               = StringUtil::toDoubleVector(input->getValue<std::string>("uz_TGV_Uz"));
+    int l0                               = StringUtil::toInt(input->getValue<std::string>("l0_TGV_Uz"));
     basicSimParameter->l0                = l0;
 
     std::vector<std::shared_ptr<TaylorGreenVortexUzParameterStruct>> parameter;
@@ -172,21 +116,22 @@ ConfigFileReader::makeTaylorGreenVortexUzParameter(std::shared_ptr<input::Input>
         aParameter->amplitude                = amplitude.at(i);
         aParameter->basicTimeStepLength      = basisTimeStepLength.at(i);
         aParameter->l0                       = l0;
-        aParameter->rho0                     = StringUtil::toDouble(input->getValue("Rho0"));
-        aParameter->vtkFilePath              = pathNumericalTests + input->getValue("FolderForVTKFileWriting");
-        aParameter->dataToCalcTests          = StringUtil::toStringVector(input->getValue("DataToCalcTests_TGV_Uz"));
+        aParameter->rho0                     = StringUtil::toDouble(input->getValue<std::string>("Rho0"));
+        aParameter->vtkFilePath              = pathNumericalTests + input->getValue<std::string>("FolderForVTKFileWriting");
+        aParameter->dataToCalcTests          = StringUtil::toStringVector(input->getValue<std::string>("DataToCalcTests_TGV_Uz"));
         parameter.push_back(aParameter);
     }
     return parameter;
 }
 std::vector<std::shared_ptr<ShearWaveParameterStruct>>
-ConfigFileReader::makeShearWaveParameter(std::shared_ptr<input::Input> input,
-                                         std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
+makeShearWaveParameter(const std::string pathNumericalTests,
+                       ConfigFilePtr input,
+                       std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter)
 {
-    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue("BasisTimeStepLength_SW"));
-    std::vector<double> uz               = StringUtil::toDoubleVector(input->getValue("v0_SW"));
-    std::vector<double> ux               = StringUtil::toDoubleVector(input->getValue("u0_SW"));
-    int l0                               = StringUtil::toInt(input->getValue("l0_SW"));
+    std::vector<int> basisTimeStepLength = StringUtil::toIntVector(input->getValue<std::string>("BasisTimeStepLength_SW"));
+    std::vector<double> uz               = StringUtil::toDoubleVector(input->getValue<std::string>("v0_SW"));
+    std::vector<double> ux               = StringUtil::toDoubleVector(input->getValue<std::string>("u0_SW"));
+    int l0                               = StringUtil::toInt(input->getValue<std::string>("l0_SW"));
     basicSimParameter->l0                = l0;
 
     std::vector<std::shared_ptr<ShearWaveParameterStruct>> parameter;
@@ -198,82 +143,116 @@ ConfigFileReader::makeShearWaveParameter(std::shared_ptr<input::Input> input,
         aParameter->ux                       = ux.at(i);
         aParameter->basicTimeStepLength      = basisTimeStepLength.at(i);
         aParameter->l0                       = l0;
-        aParameter->rho0                     = StringUtil::toDouble(input->getValue("Rho0"));
-        aParameter->vtkFilePath              = pathNumericalTests + input->getValue("FolderForVTKFileWriting");
-        aParameter->dataToCalcTests          = StringUtil::toStringVector(input->getValue("DataToCalcTests_SW"));
+        aParameter->rho0                     = StringUtil::toDouble(input->getValue<std::string>("Rho0"));
+        aParameter->vtkFilePath              = pathNumericalTests + input->getValue<std::string>("FolderForVTKFileWriting");
+        aParameter->dataToCalcTests          = StringUtil::toStringVector(input->getValue<std::string>("DataToCalcTests_SW"));
         parameter.push_back(aParameter);
     }
     return parameter;
 }
 
-std::shared_ptr<NyTestParameterStruct> ConfigFileReader::makeNyTestParameter(std::shared_ptr<input::Input> input)
+std::shared_ptr<NyTestParameterStruct> makeNyTestParameter(ConfigFilePtr input)
 {
     std::shared_ptr<BasicTestParameterStruct> basicTestParameter =
         std::shared_ptr<BasicTestParameterStruct>(new BasicTestParameterStruct);
-    basicTestParameter->runTest              = StringUtil::toBool(input->getValue("NyTest"));
-    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue("ySliceForCalculation"));
+    basicTestParameter->runTest              = StringUtil::toBool(input->getValue<std::string>("NyTest"));
+    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue<std::string>("ySliceForCalculation"));
 
     std::shared_ptr<NyTestParameterStruct> testParameter =
         std::shared_ptr<NyTestParameterStruct>(new NyTestParameterStruct);
     testParameter->basicTestParameter       = basicTestParameter;
-    testParameter->endTimeStepCalculation   = StringUtil::toInt(input->getValue("EndTimeStepCalculation_Ny"));
-    testParameter->minOrderOfAccuracy       = StringUtil::toDouble(input->getValue("MinOrderOfAccuracy_Ny"));
-    testParameter->startTimeStepCalculation = StringUtil::toInt(input->getValue("StartTimeStepCalculation_Ny"));
+    testParameter->endTimeStepCalculation   = StringUtil::toInt(input->getValue<std::string>("EndTimeStepCalculation_Ny"));
+    testParameter->minOrderOfAccuracy       = StringUtil::toDouble(input->getValue<std::string>("MinOrderOfAccuracy_Ny"));
+    testParameter->startTimeStepCalculation = StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_Ny"));
 
     return testParameter;
 }
 
-std::shared_ptr<PhiTestParameterStruct> ConfigFileReader::makePhiTestParameter(std::shared_ptr<input::Input> input)
+std::shared_ptr<PhiTestParameterStruct> makePhiTestParameter(ConfigFilePtr input)
 {
     std::shared_ptr<BasicTestParameterStruct> basicTestParameter =
         std::shared_ptr<BasicTestParameterStruct>(new BasicTestParameterStruct);
-    basicTestParameter->runTest              = StringUtil::toBool(input->getValue("PhiTest"));
-    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue("ySliceForCalculation"));
+    basicTestParameter->runTest              = StringUtil::toBool(input->getValue<std::string>("PhiTest"));
+    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue<std::string>("ySliceForCalculation"));
 
     std::shared_ptr<PhiTestParameterStruct> testParameter =
         std::shared_ptr<PhiTestParameterStruct>(new PhiTestParameterStruct);
     testParameter->basicTestParameter       = basicTestParameter;
-    testParameter->endTimeStepCalculation   = StringUtil::toInt(input->getValue("EndTimeStepCalculation_Phi"));
-    testParameter->minOrderOfAccuracy       = StringUtil::toDouble(input->getValue("MinOrderOfAccuracy_Phi"));
-    testParameter->startTimeStepCalculation = StringUtil::toInt(input->getValue("StartTimeStepCalculation_Phi"));
+    testParameter->endTimeStepCalculation   = StringUtil::toInt(input->getValue<std::string>("EndTimeStepCalculation_Phi"));
+    testParameter->minOrderOfAccuracy       = StringUtil::toDouble(input->getValue<std::string>("MinOrderOfAccuracy_Phi"));
+    testParameter->startTimeStepCalculation = StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_Phi"));
 
     return testParameter;
 }
 
 std::shared_ptr<L2NormTestParameterStruct>
-ConfigFileReader::makeL2NormTestParameter(std::shared_ptr<input::Input> input)
+makeL2NormTestParameter(ConfigFilePtr input)
 {
     std::shared_ptr<BasicTestParameterStruct> basicTestParameter =
         std::shared_ptr<BasicTestParameterStruct>(new BasicTestParameterStruct);
-    basicTestParameter->runTest              = StringUtil::toBool(input->getValue("L2NormTest"));
-    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue("ySliceForCalculation"));
+    basicTestParameter->runTest              = StringUtil::toBool(input->getValue<std::string>("L2NormTest"));
+    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue<std::string>("ySliceForCalculation"));
 
     std::shared_ptr<L2NormTestParameterStruct> testParameter =
         std::shared_ptr<L2NormTestParameterStruct>(new L2NormTestParameterStruct);
     testParameter->basicTestParameter = basicTestParameter;
-    testParameter->basicTimeStep      = StringUtil::toInt(input->getValue("BasicTimeStep_L2"));
-    testParameter->divergentTimeStep  = StringUtil::toInt(input->getValue("DivergentTimeStep_L2"));
-    testParameter->normalizeData      = StringUtil::toStringVector(input->getValue("NormalizeData_L2Norm"));
-    testParameter->maxDiff            = StringUtil::toDoubleVector(input->getValue("MaxL2NormDiff"));
+    testParameter->basicTimeStep      = StringUtil::toInt(input->getValue<std::string>("BasicTimeStep_L2"));
+    testParameter->divergentTimeStep  = StringUtil::toInt(input->getValue<std::string>("DivergentTimeStep_L2"));
+    testParameter->normalizeData      = StringUtil::toStringVector(input->getValue<std::string>("NormalizeData_L2Norm"));
+    testParameter->maxDiff            = StringUtil::toDoubleVector(input->getValue<std::string>("MaxL2NormDiff"));
 
     return testParameter;
 }
 
+std::vector<std::string> readKernelList(ConfigFilePtr input)
+{
+    if (StringUtil::toBool(input->getValue<std::string>("L2NormBetweenKernelsTest"))) {
+        std::vector<std::string> kernelList = StringUtil::toStringVector(input->getValue<std::string>("KernelsToTest"));
+        std::string beginKernel             = input->getValue<std::string>("BasicKernel_L2NormBetweenKernels");
+        bool basicKernelInKernelList        = false;
+        for (int i = 0; i < kernelList.size(); i++) {
+            if (kernelList.at(i) == beginKernel)
+                basicKernelInKernelList = true;
+        }
+        if (!basicKernelInKernelList)
+            kernelList.push_back(beginKernel);
+
+        std::vector<std::string> kernelNames = kernelList;
+
+        while (kernelNames.at(0) != beginKernel) {
+            kernelNames.push_back(kernelNames.at(0));
+            std::vector<std::string>::iterator it = kernelNames.begin();
+            kernelNames.erase(it);
+        }
+        std::vector<std::string> kernels;
+        for (int i = 0; i < kernelNames.size(); i++)
+            kernels.push_back(kernelNames.at(i));
+        return kernels;
+    } else {
+        std::vector<std::string> kernelList = StringUtil::toStringVector(input->getValue<std::string>("KernelsToTest"));
+        std::vector<std::string> kernels;
+        for (int i = 0; i < kernelList.size(); i++)
+            kernels.push_back(kernelList.at(i));
+
+        return kernels;
+    }
+}
+
 std::shared_ptr<L2NormTestBetweenKernelsParameterStruct>
-ConfigFileReader::makeL2NormTestBetweenKernelsParameter(std::shared_ptr<input::Input> input)
+makeL2NormTestBetweenKernelsParameter(ConfigFilePtr input)
 {
     std::shared_ptr<BasicTestParameterStruct> basicTestParameter =
         std::shared_ptr<BasicTestParameterStruct>(new BasicTestParameterStruct);
-    basicTestParameter->runTest              = StringUtil::toBool(input->getValue("L2NormBetweenKernelsTest"));
-    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue("ySliceForCalculation"));
+    basicTestParameter->runTest              = StringUtil::toBool(input->getValue<std::string>("L2NormBetweenKernelsTest"));
+    basicTestParameter->ySliceForCalculation = StringUtil::toInt(input->getValue<std::string>("ySliceForCalculation"));
 
     std::shared_ptr<L2NormTestBetweenKernelsParameterStruct> testParameter =
         std::shared_ptr<L2NormTestBetweenKernelsParameterStruct>(new L2NormTestBetweenKernelsParameterStruct);
     testParameter->basicTestParameter = basicTestParameter;
-    testParameter->basicKernel        = input->getValue("BasicKernel_L2NormBetweenKernels");
+    testParameter->basicKernel        = input->getValue<std::string>("BasicKernel_L2NormBetweenKernels");
     testParameter->kernelsToTest      = readKernelList(input);
-    testParameter->timeSteps          = StringUtil::toIntVector(input->getValue("Timesteps_L2NormBetweenKernels"));
-    testParameter->normalizeData      = StringUtil::toStringVector(input->getValue("NormalizeData_L2Norm"));
+    testParameter->timeSteps          = StringUtil::toIntVector(input->getValue<std::string>("Timesteps_L2NormBetweenKernels"));
+    testParameter->normalizeData      = StringUtil::toStringVector(input->getValue<std::string>("NormalizeData_L2Norm"));
 
     bool correct = false;
     for (int i = 0; i < testParameter->normalizeData.size(); i++)
@@ -292,7 +271,7 @@ ConfigFileReader::makeL2NormTestBetweenKernelsParameter(std::shared_ptr<input::I
 }
 
 std::vector<std::shared_ptr<GridInformationStruct>>
-ConfigFileReader::makeGridInformation(std::shared_ptr<input::Input> input, std::string simName)
+makeGridInformation(const std::string pathNumericalTests, ConfigFilePtr input, std::string simName)
 {
     int number = 32;
     std::vector<std::string> valueNames;
@@ -314,10 +293,10 @@ ConfigFileReader::makeGridInformation(std::shared_ptr<input::Input> input, std::
     double nextNumber = 32.0;
 
     for (int i = 0; i < valueNames.size(); i++) {
-        if (StringUtil::toBool(input->getValue(valueNames.at(i)))) {
+        if (StringUtil::toBool(input->getValue<std::string>(valueNames.at(i)))) {
             lx.push_back(nextNumber);
             lz.push_back(nextNumber * 3.0 / 2.0);
-            gridPath.push_back(pathNumericalTests + input->getValue(gridPaths.at(i)));
+            gridPath.push_back(pathNumericalTests + input->getValue<std::string>(gridPaths.at(i)));
             nextNumber *= 2;
         }
     }
@@ -326,7 +305,7 @@ ConfigFileReader::makeGridInformation(std::shared_ptr<input::Input> input, std::
     for (int i = 0; i < lx.size(); i++) {
         std::shared_ptr<GridInformationStruct> aGridInformation =
             std::shared_ptr<GridInformationStruct>(new GridInformationStruct);
-        aGridInformation->numberOfGridLevels = StringUtil::toInt(input->getValue("NumberOfGridLevels"));
+        aGridInformation->numberOfGridLevels = StringUtil::toInt(input->getValue<std::string>("NumberOfGridLevels"));
         aGridInformation->maxLevel           = aGridInformation->numberOfGridLevels - 1;
         aGridInformation->gridPath           = gridPath.at(i);
         aGridInformation->lx                 = lx.at(i);
@@ -336,112 +315,127 @@ ConfigFileReader::makeGridInformation(std::shared_ptr<input::Input> input, std::
     return gridInformation;
 }
 
+unsigned int calcStartStepForToVectorWriter(ConfigFilePtr input)
+{
+    std::vector<unsigned int> startStepsTests;
+    startStepsTests.push_back(StringUtil::toInt(input->getValue<std::string>("BasicTimeStep_L2")));
+    startStepsTests.push_back(StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_Ny")));
+    startStepsTests.push_back(StringUtil::toInt(input->getValue<std::string>("StartTimeStepCalculation_Phi")));
+    std::sort(startStepsTests.begin(), startStepsTests.end());
+
+    return startStepsTests.at(0);
+}
+
 std::shared_ptr<VectorWriterInformationStruct>
-ConfigFileReader::makeVectorWriterInformationStruct(std::shared_ptr<input::Input> input)
+makeVectorWriterInformationStruct(ConfigFilePtr input)
 {
     std::shared_ptr<VectorWriterInformationStruct> vectorWriter =
         std::shared_ptr<VectorWriterInformationStruct>(new VectorWriterInformationStruct);
     vectorWriter->startTimeVectorWriter  = calcStartStepForToVectorWriter(input);
-    vectorWriter->startTimeVTKDataWriter = StringUtil::toInt(input->getValue("StartStepFileWriter"));
-    vectorWriter->writeVTKFiles          = StringUtil::toBool(input->getValue("WriteVTKFiles"));
+    vectorWriter->startTimeVTKDataWriter = StringUtil::toInt(input->getValue<std::string>("StartStepFileWriter"));
+    vectorWriter->writeVTKFiles          = StringUtil::toBool(input->getValue<std::string>("WriteVTKFiles"));
 
     return vectorWriter;
 }
 
-std::shared_ptr<LogFileParameterStruct> ConfigFileReader::makeLogFilePara(std::shared_ptr<input::Input> input)
+std::shared_ptr<LogFileParameterStruct> makeLogFilePara(ConfigFilePtr input)
 {
     std::shared_ptr<LogFileParameterStruct> logFilePara =
         std::shared_ptr<LogFileParameterStruct>(new LogFileParameterStruct);
-    logFilePara->devices              = StringUtil::toIntVector(input->getValue("Devices"));
-    logFilePara->numberOfTimeSteps    = StringUtil::toInt(input->getValue("NumberOfTimeSteps"));
-    logFilePara->writeAnalyticalToVTK = StringUtil::toBool(input->getValue("WriteAnalyResultsToVTK"));
+    logFilePara->devices              = StringUtil::toIntVector(input->getValue<std::string>("Devices"));
+    logFilePara->numberOfTimeSteps    = StringUtil::toInt(input->getValue<std::string>("NumberOfTimeSteps"));
+    logFilePara->writeAnalyticalToVTK = StringUtil::toBool(input->getValue<std::string>("WriteAnalyResultsToVTK"));
 
     return logFilePara;
 }
 
-std::vector<std::string> ConfigFileReader::readKernelList(std::shared_ptr<input::Input> input)
+int calcNumberOfSimulationGroup(ConfigFilePtr input, std::string simName)
 {
-    if (StringUtil::toBool(input->getValue("L2NormBetweenKernelsTest"))) {
-        std::vector<std::string> kernelList = StringUtil::toStringVector(input->getValue("KernelsToTest"));
-        std::string beginnKernel            = input->getValue("BasicKernel_L2NormBetweenKernels");
-        bool basicKernelInKernelList        = false;
-        for (int i = 0; i < kernelList.size(); i++) {
-            if (kernelList.at(i) == beginnKernel)
-                basicKernelInKernelList = true;
-        }
-        if (!basicKernelInKernelList)
-            kernelList.push_back(beginnKernel);
-
-        std::vector<std::string> kernelNames = kernelList;
-
-        while (kernelNames.at(0) != beginnKernel) {
-            kernelNames.push_back(kernelNames.at(0));
-            std::vector<std::string>::iterator it = kernelNames.begin();
-            kernelNames.erase(it);
-        }
-        std::vector<std::string> kernels;
-        for (int i = 0; i < kernelNames.size(); i++)
-            kernels.push_back(kernelNames.at(i));
-        return kernels;
-    } else {
-        std::vector<std::string> kernelList = StringUtil::toStringVector(input->getValue("KernelsToTest"));
-        std::vector<std::string> kernels;
-        for (int i = 0; i < kernelList.size(); i++)
-            kernels.push_back(kernelList.at(i));
-
-        return kernels;
+    int counter = 0;
+    int number  = 32;
+    std::vector<std::string> valueNames;
+    for (int i = 1; i <= 5; i++) {
+        std::string aValueName = simName;
+        aValueName += std::to_string(number);
+        valueNames.push_back(aValueName);
+        number *= 2;
     }
+    for (int i = 0; i < valueNames.size(); i++) {
+        if (StringUtil::toBool(input->getValue<std::string>(valueNames.at(i))))
+            counter++;
+    }
+    return counter;
 }
 
-unsigned int ConfigFileReader::calcStartStepForToVectorWriter(std::shared_ptr<input::Input> input)
-{
-    std::vector<unsigned int> startStepsTests;
-    startStepsTests.push_back(StringUtil::toInt(input->getValue("BasicTimeStep_L2")));
-    startStepsTests.push_back(StringUtil::toInt(input->getValue("StartTimeStepCalculation_Ny")));
-    startStepsTests.push_back(StringUtil::toInt(input->getValue("StartTimeStepCalculation_Phi")));
-    std::sort(startStepsTests.begin(), startStepsTests.end());
-
-    return startStepsTests.at(0);
-}
-
-int ConfigFileReader::calcNumberOfSimulations(std::shared_ptr<input::Input> input)
+int calcNumberOfSimulations(ConfigFilePtr input, ConfigDataPtr configData)
 {
     int counter = 0;
 
     int tgvCounterU0 = calcNumberOfSimulationGroup(input, "TaylorGreenVortexUx");
-    tgvCounterU0 *= int(StringUtil::toDoubleVector(input->getValue("ux_TGV_Ux")).size());
+    tgvCounterU0 *= int(StringUtil::toDoubleVector(input->getValue<std::string>("ux_TGV_Ux")).size());
     counter += tgvCounterU0;
 
     int tgvCounterV0 = calcNumberOfSimulationGroup(input, "TaylorGreenVortexUz");
     ;
-    tgvCounterV0 *= int(StringUtil::toDoubleVector(input->getValue("uz_TGV_Uz")).size());
+    tgvCounterV0 *= int(StringUtil::toDoubleVector(input->getValue<std::string>("uz_TGV_Uz")).size());
     counter += tgvCounterV0;
 
     int swCounter = calcNumberOfSimulationGroup(input, "ShearWave");
     ;
-    swCounter *= int(StringUtil::toDoubleVector(input->getValue("u0_SW")).size());
+    swCounter *= int(StringUtil::toDoubleVector(input->getValue<std::string>("u0_SW")).size());
     counter += swCounter;
 
-    counter *= int(StringUtil::toDoubleVector(input->getValue("Viscosity")).size());
+    counter *= int(StringUtil::toDoubleVector(input->getValue<std::string>("Viscosity")).size());
     counter *= int(configData->kernelsToTest.size());
 
     return counter;
 }
 
-int ConfigFileReader::calcNumberOfSimulationGroup(std::shared_ptr<input::Input> input, std::string simName)
+ConfigDataPtr vf::gpu::tests::readConfigFile(const std::string aFilePath)
 {
-    int counter = 0;
-    int number  = 32;
-    std::vector<std::string> valueNames;
-    for (int i = 1; i <= 5; i++) {
-        std::string aValueName = simName;
-        aValueName += std::to_string(number);
-        valueNames.push_back(aValueName);
-        number *= 2;
-    }
-    for (int i = 0; i < valueNames.size(); i++) {
-        if (StringUtil::toBool(input->getValue(valueNames.at(i))))
-            counter++;
-    }
-    return counter;
+    // If PATH_NUMERICAL_TESTS is not defined, the grid definitions for the tests needs to be placed in the project root
+    // directories.
+#ifdef PATH_NUMERICAL_TESTS
+    auto pathNumericalTests = TOSTRING(PATH_NUMERICAL_TESTS) + std::string("/");
+#else
+    auto pathNumericalTests = TOSTRING(SOURCE_ROOT) + std::string("/");
+#endif
+    std::cout << pathNumericalTests << "\n";
+
+    auto configData = std::make_shared<ConfigDataStruct>();
+    auto input      = std::make_shared<vf::basics::ConfigurationFile>();
+    input->load(aFilePath);
+
+    if (!checkConfigFile(input))
+        exit(1);
+
+    configData->viscosity            = StringUtil::toDoubleVector(input->getValue<std::string>("Viscosity"));
+    configData->kernelsToTest        = readKernelList(input);
+    configData->writeAnalyticalToVTK = StringUtil::toBool(input->getValue<std::string>("WriteAnalyResultsToVTK"));
+    configData->ySliceForCalculation = StringUtil::toInt(input->getValue<std::string>("ySliceForCalculation"));
+
+    configData->logFilePath         = pathNumericalTests + input->getValue<std::string>("FolderLogFile");
+    configData->numberOfSimulations = calcNumberOfSimulations(input, configData);
+
+    auto basicSimPara = makeBasicSimulationParameter(input);
+
+    configData->taylorGreenVortexUxParameter       = makeTaylorGreenVortexUxParameter(pathNumericalTests, input, basicSimPara);
+    configData->taylorGreenVortexUxGridInformation = makeGridInformation(pathNumericalTests, input, "TaylorGreenVortexUx");
+
+    configData->taylorGreenVortexUzParameter       = makeTaylorGreenVortexUzParameter(pathNumericalTests, input, basicSimPara);
+    configData->taylorGreenVortexUzGridInformation = makeGridInformation(pathNumericalTests, input, "TaylorGreenVortexUz");
+
+    configData->shearWaveParameter       = makeShearWaveParameter(pathNumericalTests, input, basicSimPara);
+    configData->shearWaveGridInformation = makeGridInformation(pathNumericalTests, input, "ShearWave");
+
+    configData->phiTestParameter                  = makePhiTestParameter(input);
+    configData->nyTestParameter                   = makeNyTestParameter(input);
+    configData->l2NormTestParameter               = makeL2NormTestParameter(input);
+    configData->l2NormTestBetweenKernelsParameter = makeL2NormTestBetweenKernelsParameter(input);
+
+    configData->vectorWriterInfo = makeVectorWriterInformationStruct(input);
+
+    configData->logFilePara = makeLogFilePara(input);
+
+    return configData;
 }
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
index 92f58890bcd61fd47ff27160c62a708d3967f74f..cd4ad56b4c23a8a973385839aa1b0736e1d2fda1 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h
@@ -1,60 +1,19 @@
 #ifndef CONFIG_FILE_READER_H
 #define CONFIG_FILE_READER_H
 
-#include "Core/Input/Input.h"
 #include "Utilities/Structs/ConfigDataStruct.h"
 
 #include <memory>
 #include <string>
 #include <vector>
 
-class ConfigFileReader
+namespace vf::basics 
 {
-public:
-    static std::shared_ptr<ConfigFileReader> getNewInstance(const std::string aFilePath);
-    std::shared_ptr<ConfigDataStruct> getConfigData();
-    void readConfigFile();
+class ConfigurationFile;
+}
 
-private:
-    ConfigFileReader(){};
-    ConfigFileReader(const std::string aFilePath);
-
-    std::ifstream openConfigFile(const std::string aFilePath);
-    bool checkConfigFile(std::shared_ptr<input::Input> input);
-    std::vector<std::string> readKernelList(std::shared_ptr<input::Input> input);
-
-    int calcNumberOfSimulations(std::shared_ptr<input::Input> input);
-    int calcNumberOfSimulationGroup(std::shared_ptr<input::Input> input, std::string simName);
-    unsigned int calcStartStepForToVectorWriter(std::shared_ptr<input::Input> input);
-
-    std::vector<std::shared_ptr<TaylorGreenVortexUxParameterStruct>>
-    makeTaylorGreenVortexUxParameter(std::shared_ptr<input::Input> input,
-                                     std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter);
-    std::vector<std::shared_ptr<TaylorGreenVortexUzParameterStruct>>
-    makeTaylorGreenVortexUzParameter(std::shared_ptr<input::Input> input,
-                                     std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter);
-    std::vector<std::shared_ptr<ShearWaveParameterStruct>>
-    makeShearWaveParameter(std::shared_ptr<input::Input> input,
-                           std::shared_ptr<BasicSimulationParameterStruct> basicSimParameter);
-
-    std::shared_ptr<NyTestParameterStruct> makeNyTestParameter(std::shared_ptr<input::Input> input);
-    std::shared_ptr<PhiTestParameterStruct> makePhiTestParameter(std::shared_ptr<input::Input> input);
-    std::shared_ptr<L2NormTestParameterStruct> makeL2NormTestParameter(std::shared_ptr<input::Input> input);
-    std::shared_ptr<L2NormTestBetweenKernelsParameterStruct>
-    makeL2NormTestBetweenKernelsParameter(std::shared_ptr<input::Input> input);
-
-    std::shared_ptr<BasicSimulationParameterStruct> makeBasicSimulationParameter(std::shared_ptr<input::Input> input);
-    std::vector<std::shared_ptr<GridInformationStruct>> makeGridInformation(std::shared_ptr<input::Input> input,
-                                                                            std::string simName);
-
-    std::shared_ptr<VectorWriterInformationStruct>
-    makeVectorWriterInformationStruct(std::shared_ptr<input::Input> input);
-    std::shared_ptr<LogFileParameterStruct> makeLogFilePara(std::shared_ptr<input::Input> input);
-
-    std::string pathNumericalTests;
-
-    const std::string myFilePath;
-    std::shared_ptr<ConfigDataStruct> configData;
-    //std::shared_ptr<KernelMapper> myKernelMapper;
-};
+namespace vf::gpu::tests
+{
+    std::shared_ptr<ConfigDataStruct> readConfigFile(const std::string aFilePath);
+}
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
index a53f552702c50aed8640ac1ef47ed69b3d2f1d50..b9b4dd3a8d13ddcce87c555312c0d0a422de5a05 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriterImp.cpp
@@ -1,9 +1,9 @@
 #include "AnalyticalResults2DToVTKWriterImp.h"
 
-#include <stdio.h>
+#include <cmath>
 #include <fstream>
 #include <sstream>
-#include <cmath>
+#include <stdio.h>
 
 #include <Core/StringUtilities/StringUtil.h>
 
@@ -17,50 +17,57 @@
 #include "Utilities/Results/AnalyticalResults/AnalyticalResult.h"
 #include <mpi.h>
 
-
-std::shared_ptr<AnalyticalResults2DToVTKWriterImp> AnalyticalResults2DToVTKWriterImp::getInstance(bool writeAnalyticalResults)
+std::shared_ptr<AnalyticalResults2DToVTKWriterImp>
+AnalyticalResults2DToVTKWriterImp::getInstance(bool writeAnalyticalResults)
 {
-	static std::shared_ptr<AnalyticalResults2DToVTKWriterImp> uniqueInstance;
-	if (!uniqueInstance)
-		uniqueInstance = std::shared_ptr<AnalyticalResults2DToVTKWriterImp>(new AnalyticalResults2DToVTKWriterImp(writeAnalyticalResults));
-	return uniqueInstance;
+    static std::shared_ptr<AnalyticalResults2DToVTKWriterImp> uniqueInstance;
+    if (!uniqueInstance)
+        uniqueInstance = std::shared_ptr<AnalyticalResults2DToVTKWriterImp>(
+            new AnalyticalResults2DToVTKWriterImp(writeAnalyticalResults));
+    return uniqueInstance;
 }
 
-AnalyticalResults2DToVTKWriterImp::AnalyticalResults2DToVTKWriterImp(bool writeAnalyticalResults) : writeAnalyticalResults(writeAnalyticalResults)
+AnalyticalResults2DToVTKWriterImp::AnalyticalResults2DToVTKWriterImp(bool writeAnalyticalResults)
+    : writeAnalyticalResults(writeAnalyticalResults)
 {
-
 }
 
-void AnalyticalResults2DToVTKWriterImp::writeAnalyticalResult(std::shared_ptr<Parameter> para, std::shared_ptr<AnalyticalResults> analyticalResult)
+void AnalyticalResults2DToVTKWriterImp::writeAnalyticalResult(std::shared_ptr<Parameter> para,
+                                                              std::shared_ptr<AnalyticalResults> analyticalResult)
 {
-	if (writeAnalyticalResults) {
-		std::cout << "Write Analytical Result To VTK-Files" << std::endl;
-		for (int level = para->getCoarse(); level <= para->getFine(); level++) {
+    if (writeAnalyticalResults) {
+        std::cout << "Write Analytical Result To VTK-Files" << std::endl;
+        for (int level = para->getCoarse(); level <= para->getFine(); level++) {
 #pragma omp parallel for
-			for (int timeStep = 0; timeStep < analyticalResult->getNumberOfTimeSteps(); timeStep++) {
-				const unsigned int numberOfParts = para->getParH(level)->size_Mat_SP / para->getlimitOfNodesForVTK() + 1;
-				std::vector<std::string> fname;
-				unsigned int time = analyticalResult->getTimeSteps().at(timeStep)*analyticalResult->getTimeStepLength();
-				for (int j = 1; j <= numberOfParts; j++) {
-					std::string filePath = para->getFName();
-					filePath.resize(filePath.size() - 5);
-					fname.push_back(filePath + "AnalyticalResult/Analytical_cells_bin_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyID()) + "_Part_" + StringUtil::toString<int>(j) + "_t_" + StringUtil::toString<int>(time) + ".vtk");
-				}
-				std::cout << "\t Write TimeStep=" << timeStep << " t=" << time << "...";
-				writeTimeStep(para, analyticalResult, level, fname, timeStep);
-				std::cout << "done." << std::endl;
-			}
-		}
-		std::cout << std::endl;
-	}
+            for (int timeStep = 0; timeStep < analyticalResult->getNumberOfTimeSteps(); timeStep++) {
+                const unsigned int numberOfParts = para->getParH(level)->size_Mat / para->getlimitOfNodesForVTK() + 1;
+                std::vector<std::string> fname;
+                unsigned int time =
+                    analyticalResult->getTimeSteps().at(timeStep) * analyticalResult->getTimeStepLength();
+                for (int j = 1; j <= numberOfParts; j++) {
+                    std::string filePath = para->getFName();
+                    filePath.resize(filePath.size() - 5);
+                    fname.push_back(filePath + "AnalyticalResult/Analytical_cells_bin_lev_" +
+                                    StringUtil::toString<int>(level) + "_ID_" +
+                                    StringUtil::toString<int>(para->getMyProcessID()) + "_Part_" +
+                                    StringUtil::toString<int>(j) + "_t_" + StringUtil::toString<int>(time) + ".vtk");
+                }
+                std::cout << "\t Write TimeStep=" << timeStep << " t=" << time << "...";
+                writeTimeStep(para, analyticalResult, level, fname, timeStep);
+                std::cout << "done." << std::endl;
+            }
+        }
+        std::cout << std::endl;
+    }
 }
 
-
-void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter> para, std::shared_ptr<AnalyticalResults> analyticalResult, int level, std::vector<std::string> & fname, int timeStep)
+void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter> para,
+                                                      std::shared_ptr<AnalyticalResults> analyticalResult, int level,
+                                                      std::vector<std::string> &fname, int timeStep)
 {
-	std::vector<UbTupleFloat3 > nodes;
-    std::vector<UbTupleUInt8 > cells;
-    std::vector<std::string > nodedatanames;
+    std::vector<UbTupleFloat3> nodes;
+    std::vector<UbTupleUInt8> cells;
+    std::vector<std::string> nodedatanames;
     nodedatanames.push_back("press");
     nodedatanames.push_back("rho");
     nodedatanames.push_back("vx1");
@@ -73,21 +80,21 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
     unsigned int startpos = 0;
     unsigned int endpos = 0;
     unsigned int sizeOfNodes = 0;
-    std::vector<std::vector<double > > nodedata(nodedatanames.size());
+    std::vector<std::vector<double>> nodedata(nodedatanames.size());
 
-	maxX = para->getGridX().at(level);
-	maxY = para->getGridY().at(level);
-	maxZ = para->getGridZ().at(level);
+    maxX = para->getGridX().at(level);
+    maxY = para->getGridY().at(level);
+    maxZ = para->getGridZ().at(level);
 
-	std::vector<double> press = analyticalResult->getPress()[timeStep];
-	std::vector<double> rho = analyticalResult->getRho()[timeStep];
-	std::vector<double> vx = analyticalResult->getVx()[timeStep];
-	std::vector<double> vy = analyticalResult->getVy()[timeStep];
-	std::vector<double> vz = analyticalResult->getVz()[timeStep];
+    std::vector<double> press = analyticalResult->getPress()[timeStep];
+    std::vector<double> rho = analyticalResult->getRho()[timeStep];
+    std::vector<double> vx = analyticalResult->getVx()[timeStep];
+    std::vector<double> vy = analyticalResult->getVy()[timeStep];
+    std::vector<double> vz = analyticalResult->getVz()[timeStep];
 
-    for (unsigned int part = 0; part < fname.size(); part++){
-        if (((part + 1)*para->getlimitOfNodesForVTK()) > para->getParH(level)->size_Mat_SP)
-            sizeOfNodes = para->getParH(level)->size_Mat_SP - (part * para->getlimitOfNodesForVTK());
+    for (unsigned int part = 0; part < fname.size(); part++) {
+        if (((part + 1) * para->getlimitOfNodesForVTK()) > para->getParH(level)->size_Mat)
+            sizeOfNodes = para->getParH(level)->size_Mat - (part * para->getlimitOfNodesForVTK());
         else
             sizeOfNodes = para->getlimitOfNodesForVTK();
 
@@ -104,14 +111,13 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
         nodedata[4].resize(sizeOfNodes);
         nodedata[5].resize(sizeOfNodes);
         //////////////////////////////////////////////////////////////////////////
-        for (unsigned int pos = startpos; pos < endpos; pos++)
-        {
-            if (para->getParH(level)->geoSP[pos] == GEO_FLUID)
-            {
+        for (unsigned int pos = startpos; pos < endpos; pos++) {
+            std::cout << "BEGIN POS: " << pos << std::endl;
+            if (para->getParH(level)->typeOfGridNode[pos] == GEO_FLUID) {
                 //////////////////////////////////////////////////////////////////////////
-                double x1 = para->getParH(level)->coordX_SP[pos];
-                double x2 = para->getParH(level)->coordY_SP[pos];
-                double x3 = para->getParH(level)->coordZ_SP[pos];
+                double x1 = para->getParH(level)->coordinateX[pos];
+                double x2 = para->getParH(level)->coordinateY[pos];
+                double x3 = para->getParH(level)->coordinateZ[pos];
                 //////////////////////////////////////////////////////////////////////////
                 number1 = pos;
                 dn1 = pos - startpos;
@@ -119,37 +125,57 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
                 //////////////////////////////////////////////////////////////////////////
                 nodes[dn1] = (makeUbTuple((float)(x1), (float)(x2), (float)(x3)));
 
-				int numberInResults = CoordResults2DTo1D(x1 - 1.0, x3 - 1.0);
+                int numberInResults = CoordResults2DTo1D(x1 - 1.0, x3 - 1.0);
                 nodedata[0][dn1] = press[numberInResults];
-				nodedata[1][dn1] = rho[numberInResults];
+                nodedata[1][dn1] = rho[numberInResults];
                 nodedata[2][dn1] = vx[numberInResults];
                 nodedata[3][dn1] = vy[numberInResults];
                 nodedata[4][dn1] = vz[numberInResults];
-                nodedata[5][dn1] = (double)para->getParH(level)->geoSP[pos];
+                nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
                 //////////////////////////////////////////////////////////////////////////
-                number2 = para->getParH(level)->neighborX_SP[number1];
-                number3 = para->getParH(level)->neighborY_SP[number2];
-                number4 = para->getParH(level)->neighborY_SP[number1];
-                number5 = para->getParH(level)->neighborZ_SP[number1];
-                number6 = para->getParH(level)->neighborZ_SP[number2];
-                number7 = para->getParH(level)->neighborZ_SP[number3];
-                number8 = para->getParH(level)->neighborZ_SP[number4];
+                number2 = para->getParH(level)->neighborX[number1];
+                number3 = para->getParH(level)->neighborY[number2];
+                number4 = para->getParH(level)->neighborY[number1];
+                number5 = para->getParH(level)->neighborZ[number1];
+                number6 = para->getParH(level)->neighborZ[number2];
+                number7 = para->getParH(level)->neighborZ[number3];
+                number8 = para->getParH(level)->neighborZ[number4];
+                std::cout << "NeighborIndex1 " << number1 << std::endl <<
+                "NeighborIndex2 " << number2 << std::endl <<
+                "NeighborIndex3 " << number3 << std::endl <<
+                "NeighborIndex4 " << number4 << std::endl <<
+                "NeighborIndex5 " << number5 << std::endl <<
+                "NeighborIndex6 " << number6 << std::endl <<
+                "NeighborIndex7 " << number7 << std::endl <<
+                "NeighborIndex8 " << number8 << std::endl;
                 //////////////////////////////////////////////////////////////////////////
-                if (para->getParH(level)->geoSP[number2] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number3] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number4] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number5] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number6] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number7] != GEO_FLUID ||
-                    para->getParH(level)->geoSP[number8] != GEO_FLUID)  neighborsAreFluid = false;
+                auto neighbor1 = para->getParH(level)->typeOfGridNode[number2];
+                auto neighbor2 = para->getParH(level)->typeOfGridNode[number3];
+                auto neighbor3 = para->getParH(level)->typeOfGridNode[number4];
+                auto neighbor4 = para->getParH(level)->typeOfGridNode[number5]; //breaks!
+                auto neighbor5 = para->getParH(level)->typeOfGridNode[number6];
+                auto neighbor6 = para->getParH(level)->typeOfGridNode[number7];
+                auto neighbor7 = para->getParH(level)->typeOfGridNode[number8];
+                std::cout << "Neighbor1 " << neighbor1 << std::endl <<
+                "Neighbor2 " << neighbor2 << std::endl <<
+                "Neighbor3 " << neighbor3 << std::endl <<
+                "Neighbor4 " << neighbor4 << std::endl <<
+                "Neighbor5 " << neighbor5 << std::endl <<
+                "Neighbor6 " << neighbor6 << std::endl <<
+                "Neighbor7 " << neighbor7 << std::endl;
+
+                if (para->getParH(level)->typeOfGridNode[number2] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number3] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number4] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number5] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number6] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number7] != GEO_FLUID ||
+                    para->getParH(level)->typeOfGridNode[number8] != GEO_FLUID)
+                    neighborsAreFluid = false;
                 //////////////////////////////////////////////////////////////////////////
-                if (number2 > endpos ||
-                    number3 > endpos ||
-                    number4 > endpos ||
-                    number5 > endpos ||
-                    number6 > endpos ||
-                    number7 > endpos ||
-                    number8 > endpos)  neighborsAreFluid = false;
+                if (number2 > endpos || number3 > endpos || number4 > endpos || number5 > endpos || number6 > endpos ||
+                    number7 > endpos || number8 > endpos)
+                    neighborsAreFluid = false;
                 //////////////////////////////////////////////////////////////////////////
                 dn2 = number2 - startpos;
                 dn3 = number3 - startpos;
@@ -161,6 +187,7 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
                 //////////////////////////////////////////////////////////////////////////
                 if (neighborsAreFluid)
                     cells.push_back(makeUbTuple(dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8));
+                std::cout << "END POS: " << pos << std::endl;
             }
         }
         WbWriterVtkXmlBinary::getInstance()->writeOctsWithNodeData(fname[part], nodes, cells, nodedatanames, nodedata);
@@ -169,5 +196,5 @@ void AnalyticalResults2DToVTKWriterImp::writeTimeStep(std::shared_ptr<Parameter>
 
 int AnalyticalResults2DToVTKWriterImp::CoordResults2DTo1D(int x, int z)
 {
-	return z * (maxX - 1) + x;
+    return z * (maxX - 1) + x;
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/Y2dSliceToResults/Y2dSliceToResults.cpp b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/Y2dSliceToResults/Y2dSliceToResults.cpp
index d507d92b6a790c06981852251db087039062759e..c13f6f748463287fc4e7401d56d7c7ab40cc1a77 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/DataWriter/Y2dSliceToResults/Y2dSliceToResults.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/DataWriter/Y2dSliceToResults/Y2dSliceToResults.cpp
@@ -1,66 +1,76 @@
 #include "Y2dSliceToResults.h"
 
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
 #include "Utilities/Results/SimulationResults/SimulationResults.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
 
+#include <cmath>
+#include <sstream>
 
-std::shared_ptr<Y2dSliceToResults> Y2dSliceToResults::getNewInstance(std::shared_ptr<VectorWriterInformationStruct> vectorWriterInfo, unsigned int timeStepLength, std::shared_ptr<SimulationResults> simResults, unsigned int ySliceForCalculation)
+std::shared_ptr<Y2dSliceToResults>
+Y2dSliceToResults::getNewInstance(std::shared_ptr<VectorWriterInformationStruct> vectorWriterInfo,
+                                  unsigned int timeStepLength, std::shared_ptr<SimulationResults> simResults,
+                                  unsigned int ySliceForCalculation)
 {
-	return std::shared_ptr<Y2dSliceToResults>(new Y2dSliceToResults(vectorWriterInfo, timeStepLength, simResults, ySliceForCalculation));
+    return std::shared_ptr<Y2dSliceToResults>(
+        new Y2dSliceToResults(vectorWriterInfo, timeStepLength, simResults, ySliceForCalculation));
 }
 
 Y2dSliceToResults::Y2dSliceToResults()
 {
-
 }
 
-Y2dSliceToResults::Y2dSliceToResults(std::shared_ptr<VectorWriterInformationStruct> vectorWriterInfo, unsigned int timeStepLength, std::shared_ptr<SimulationResults> simResults, unsigned int ySliceForCalculation)
-	: ToVectorWriter(vectorWriterInfo, timeStepLength)
+Y2dSliceToResults::Y2dSliceToResults(std::shared_ptr<VectorWriterInformationStruct> vectorWriterInfo,
+                                     unsigned int timeStepLength, std::shared_ptr<SimulationResults> simResults,
+                                     unsigned int ySliceForCalculation)
+    : ToVectorWriter(vectorWriterInfo, timeStepLength)
 {
-	this->simResults = simResults;
-	this->ySliceForCalculation = ySliceForCalculation;
+    this->simResults = simResults;
+    this->ySliceForCalculation = ySliceForCalculation;
 }
 
 void Y2dSliceToResults::writeTimestep(std::shared_ptr<Parameter> para, unsigned int t, int level)
 {
-	int timestep = t / timeStepLength;
-	maxX = para->getGridX().at(level);
-	maxY = para->getGridY().at(level);
-	maxZ = para->getGridZ().at(level);
+    int timestep = t / timeStepLength;
+    maxX = para->getGridX().at(level);
+    maxY = para->getGridY().at(level);
+    maxZ = para->getGridZ().at(level);
 
-	int numberNodes = (maxX - 1) * (maxZ - 1);
-	std::vector<double> x(numberNodes), y(numberNodes), z(numberNodes);
-	std::vector<double> vx(numberNodes), vy(numberNodes), vz(numberNodes);
-	std::vector<double> press(numberNodes), rho(numberNodes);
-	std::vector<unsigned int> levels(numberNodes);
+    int numberNodes = (maxX - 1) * (maxZ - 1);
+    std::vector<double> x(numberNodes), y(numberNodes), z(numberNodes);
+    std::vector<double> vx(numberNodes), vy(numberNodes), vz(numberNodes);
+    std::vector<double> press(numberNodes), rho(numberNodes);
+    std::vector<unsigned int> levels(numberNodes);
 
-	for (int posZ = 0; posZ < maxZ - 1; posZ++)
-	{
-		for (int posX = 0; posX < maxX - 1; posX++)
-		{
-			int posResults = CoordResults2DTo1D(posX, posZ);
-			int posPara = CoordPara3DTo1D(posX, ySliceForCalculation, posZ);
+    ySliceForCalculation = maxY / 2;
+    for (int posZ = 0; posZ < maxZ - 1; posZ++) {
+        for (int posX = 0; posX < maxX - 1; posX++) {
+            int posResults = CoordResults2DTo1D(posX, posZ);
+            int posPara = CoordPara3DTo1D(posX, ySliceForCalculation, posZ);
 
-			x.at(posResults) = (double)para->getParH(level)->coordX_SP[posPara] - (double)1.0;
-			y.at(posResults) = (double)para->getParH(level)->coordY_SP[posPara] - (double)1.0;
-			z.at(posResults) = (double)para->getParH(level)->coordZ_SP[posPara] - (double)1.0;
-			vx.at(posResults) = (double)para->getParH(level)->vx_SP[posPara] * (double)para->getVelocityRatio();
-			vy.at(posResults) = (double)para->getParH(level)->vy_SP[posPara] * (double)para->getVelocityRatio();
-			vz.at(posResults) = (double)para->getParH(level)->vz_SP[posPara] * (double)para->getVelocityRatio();
-			press.at(posResults) = (double)para->getParH(level)->press_SP[posPara] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-			rho.at(posResults) = (double)para->getParH(level)->rho_SP[posPara] / (double)3.0 * (double)para->getDensityRatio() * (double)para->getVelocityRatio() * (double)para->getVelocityRatio();
-			levels.at(posResults) = level;
-		}
-	}
-	simResults->addTimeStep(timestep, t, levels, x, y, z, vx, vy, vz, press, rho);
+            x.at(posResults) = (double)para->getParH(level)->coordinateX[posPara] - (double)1.0;
+            y.at(posResults) = (double)para->getParH(level)->coordinateY[posPara] - (double)1.0;
+            z.at(posResults) = (double)para->getParH(level)->coordinateZ[posPara] - (double)1.0;
+            vx.at(posResults) = (double)para->getParH(level)->velocityX[posPara] * (double)para->getVelocityRatio();
+            vy.at(posResults) = (double)para->getParH(level)->velocityY[posPara] * (double)para->getVelocityRatio();
+            vz.at(posResults) = (double)para->getParH(level)->velocityZ[posPara] * (double)para->getVelocityRatio();
+            press.at(posResults) = (double)para->getParH(level)->pressure[posPara] / (double)3.0 *
+                                   (double)para->getDensityRatio() * (double)para->getVelocityRatio() *
+                                   (double)para->getVelocityRatio();
+            rho.at(posResults) = (double)para->getParH(level)->rho[posPara] / (double)3.0 *
+                                 (double)para->getDensityRatio() * (double)para->getVelocityRatio() *
+                                 (double)para->getVelocityRatio();
+            levels.at(posResults) = level;
+        }
+    }
+    simResults->addTimeStep(timestep, t, levels, x, y, z, vx, vy, vz, press, rho);
 }
 
 int Y2dSliceToResults::CoordPara3DTo1D(int x, int y, int z)
 {
-	return z*maxY*maxX + y*maxX + x + 1;
+    return z * maxY * maxX + y * maxX + x + 1;
 }
 
 int Y2dSliceToResults::CoordResults2DTo1D(int x, int z)
 {
-	return z * (maxX - 1) + x;
+    return z * (maxX - 1) + x;
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/InitialCondition/InitialConditionImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/InitialCondition/InitialConditionImp.cpp
index c31f8cbea82441eb41d2e39e96e502504d5fa5dd..72045afbe7a68ba9c8595ee5c736dd5b2da89ec7 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/InitialCondition/InitialConditionImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/InitialCondition/InitialConditionImp.cpp
@@ -16,15 +16,15 @@ void InitialConditionImp::init(const int level)
 
 real InitialConditionImp::getXCoord(int i, int level)
 {
-	return (real)(para->getParH(level)->coordX_SP[i] - 1.0);
+	return para->getParH(level)->coordinateX[i] - 1.0;
 }
 
 real InitialConditionImp::getYCoord(int i, int level)
 {
-	return (real)(para->getParH(level)->coordY_SP[i] - 1.0);
+	return para->getParH(level)->coordinateY[i] - 1.0;
 }
 
 real InitialConditionImp::getZCoord(int i, int level)
 {
-	return (real)(para->getParH(level)->coordZ_SP[i] - 1.0);
+	return para->getParH(level)->coordinateZ[i] - 1.0;
 }
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestFactory/NumericalTestFactoryImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestFactory/NumericalTestFactoryImp.cpp
index ce939318899d34995db77a1cd818e07539a2c318..95e8331882282d86f25848e87df1327b77748085 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestFactory/NumericalTestFactoryImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestFactory/NumericalTestFactoryImp.cpp
@@ -6,6 +6,7 @@
 #include "Utilities/Structs/SimulationDataStruct.h"
 #include "Utilities/Structs/TestStruct.h"
 #include "Utilities/Structs/TestSimulationDataStruct.h"
+#include "Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h"
 
 #include "Simulations/TaylorGreenVortexUx/AnalyticalResults/AnalyticalResultsTaylorGreenVortexUx.h"
 #include "Simulations/TaylorGreenVortexUx/InitialConditions/InitialConditionTaylorGreenVortexUx.h"
@@ -58,6 +59,7 @@
 
 #include <algorithm>
 
+
 std::shared_ptr<NumericalTestFactoryImp> NumericalTestFactoryImp::getNewInstance(std::shared_ptr<ConfigDataStruct> configFileData)
 {
 	return std::shared_ptr<NumericalTestFactoryImp>(new NumericalTestFactoryImp(configFileData));
@@ -94,9 +96,9 @@ void NumericalTestFactoryImp::init(std::shared_ptr<ConfigDataStruct> configFileD
 	simID = 1;
 	numberOfSimulations = configFileData->numberOfSimulations;
 
-	for (int i = 0; i < configFileData->kernelsToTest.size(); i++) {
-		for (int j = 0; j < configFileData->viscosity.size(); j++) {
-			for (int k = 0; k < configFileData->taylorGreenVortexUxParameter.size(); k++) {
+	for (size_t i = 0; i < configFileData->kernelsToTest.size(); i++) {
+		for (size_t j = 0; j < configFileData->viscosity.size(); j++) {
+			for (size_t k = 0; k < configFileData->taylorGreenVortexUxParameter.size(); k++) {
 				std::shared_ptr<SimulationDataStruct> simDataStruct = makeTaylorGreenUxSimulationData(configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->taylorGreenVortexUxParameter.at(k), configFileData->taylorGreenVortexUxGridInformation);
 				if (simDataStruct->simGroupRun) {
 					std::shared_ptr<NumericalTestStruct> numericalTestStruct = makeNumericalTestStruct(configFileData, simDataStruct, configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->taylorGreenVortexUxParameter.at(k)->basicTimeStepLength);
@@ -104,7 +106,7 @@ void NumericalTestFactoryImp::init(std::shared_ptr<ConfigDataStruct> configFileD
 				}
 			}
 
-			for (int k = 0; k < configFileData->taylorGreenVortexUzParameter.size(); k++) {
+			for (size_t k = 0; k < configFileData->taylorGreenVortexUzParameter.size(); k++) {
 				std::shared_ptr<SimulationDataStruct> simDataStruct = makeTaylorGreenUzSimulationData(configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->taylorGreenVortexUzParameter.at(k), configFileData->taylorGreenVortexUzGridInformation);
 				if (simDataStruct->simGroupRun) {
 					std::shared_ptr<NumericalTestStruct> numericalTestStruct = makeNumericalTestStruct(configFileData, simDataStruct, configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->taylorGreenVortexUzParameter.at(k)->basicTimeStepLength);
@@ -112,7 +114,7 @@ void NumericalTestFactoryImp::init(std::shared_ptr<ConfigDataStruct> configFileD
 				}
 			}
 
-			for (int k = 0; k < configFileData->shearWaveParameter.size(); k++) {
+			for (size_t k = 0; k < configFileData->shearWaveParameter.size(); k++) {
 				std::shared_ptr<SimulationDataStruct> simDataStruct = makeShearWaveSimulationData(configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->shearWaveParameter.at(k), configFileData->shearWaveGridInformation);
 				if (simDataStruct->simGroupRun) {
 					std::shared_ptr<NumericalTestStruct> numericalTestStruct = makeNumericalTestStruct(configFileData, simDataStruct, configFileData->kernelsToTest.at(i), configFileData->viscosity.at(j), configFileData->shearWaveParameter.at(k)->basicTimeStepLength);
@@ -147,7 +149,7 @@ std::shared_ptr<NumericalTestStruct> NumericalTestFactoryImp::makeNumericalTestS
 	initTestStruct(l2NormTestBetweenKernelStruct, numTestStruct, testLogFileInfo, basicTestLogFileInfo);
 
 	std::vector<std::shared_ptr<SimulationInfo> > simInfo;
-	for (int i = 0; i < simDataStruct->testSimData.size(); i++)
+	for (size_t i = 0; i < simDataStruct->testSimData.size(); i++)
 		simInfo.push_back(simDataStruct->testSimData.at(i)->simInformation);
 
 	std::shared_ptr<LogFileWriter> logFileWriter = makeLogFileWriter(testLogFileInfo, simDataStruct->logFileInformation, simInfo, kernel, viscosity, basicTimeStepLength, configFileData->logFilePara, basicTestLogFileInfo);
@@ -158,10 +160,10 @@ std::shared_ptr<NumericalTestStruct> NumericalTestFactoryImp::makeNumericalTestS
 
 void NumericalTestFactoryImp::addNumericalTestStruct(std::shared_ptr<NumericalTestStruct> numericalTestStruct)
 {
-	for (int i = 0; i < numericalTestStruct->testSimulations.size(); i++)
+	for (size_t i = 0; i < numericalTestStruct->testSimulations.size(); i++)
 		myTestSimulations.push_back(numericalTestStruct->testSimulations.at(i));
 
-	for (int i = 0; i < numericalTestStruct->tests.size(); i++)
+	for (size_t i = 0; i < numericalTestStruct->tests.size(); i++)
 		myTestQueue->addTest(numericalTestStruct->tests.at(i));
 
 	myLogFileWriterQueue->addLogFileWriter(numericalTestStruct->logFileWriter);
@@ -172,7 +174,7 @@ std::shared_ptr<SimulationDataStruct> NumericalTestFactoryImp::makeTaylorGreenUx
 	std::shared_ptr<SimulationDataStruct> simDataStruct = std::shared_ptr<SimulationDataStruct>(new SimulationDataStruct);
 
 	if (gridInfoStruct.size() > 0) {
-		for (int i = 0; i < gridInfoStruct.size(); i++) {
+		for (size_t i = 0; i < gridInfoStruct.size(); i++) {
 			std::shared_ptr<TestSimulationDataStruct> aTestSimData = std::shared_ptr<TestSimulationDataStruct>(new TestSimulationDataStruct);
 			aTestSimData->simParameter = SimulationParameterTaylorGreenUx::getNewInstance(kernel, viscosity, simParaStruct, gridInfoStruct.at(i));
 			aTestSimData->initialCondition = InitialConditionTaylorGreenUx::getNewInstance(simParaStruct, gridInfoStruct.at(i));
@@ -194,7 +196,7 @@ std::shared_ptr<SimulationDataStruct> NumericalTestFactoryImp::makeTaylorGreenUz
 {
 	std::shared_ptr<SimulationDataStruct> simDataStruct = std::shared_ptr<SimulationDataStruct>(new SimulationDataStruct);
 	if (gridInfoStruct.size() > 0) {
-		for (int i = 0; i < gridInfoStruct.size(); i++) {
+		for (size_t i = 0; i < gridInfoStruct.size(); i++) {
 			std::shared_ptr<TestSimulationDataStruct> aTestSimData = std::shared_ptr<TestSimulationDataStruct>(new TestSimulationDataStruct);
 			aTestSimData->simParameter = SimulationParameterTaylorGreenUz::getNewInstance(kernel, viscosity, simParaStruct, gridInfoStruct.at(i));
 			aTestSimData->initialCondition = InitialConditionTaylorGreenUz::getNewInstance(simParaStruct, gridInfoStruct.at(i));
@@ -216,7 +218,7 @@ std::shared_ptr<SimulationDataStruct> NumericalTestFactoryImp::makeShearWaveSimu
 {
 	std::shared_ptr<SimulationDataStruct> simDataStruct = std::shared_ptr<SimulationDataStruct>(new SimulationDataStruct);
 	if (gridInfoStruct.size() > 0) {
-		for (int i = 0; i < gridInfoStruct.size(); i++) {
+		for (size_t i = 0; i < gridInfoStruct.size(); i++) {
 			std::shared_ptr<TestSimulationDataStruct> aTestSimData = std::shared_ptr<TestSimulationDataStruct>(new TestSimulationDataStruct);
 			aTestSimData->simParameter = ShearWaveSimulationParameter::getNewInstance(kernel, viscosity, simParaStruct, gridInfoStruct.at(i));
 			aTestSimData->initialCondition = InitialConditionShearWave::getNewInstance(simParaStruct, gridInfoStruct.at(i));
@@ -236,17 +238,26 @@ std::shared_ptr<SimulationDataStruct> NumericalTestFactoryImp::makeShearWaveSimu
 
 std::vector<std::shared_ptr<TestSimulationImp> > NumericalTestFactoryImp::makeTestSimulations(std::vector<std::shared_ptr<TestSimulationDataStruct> > testSimDataStruct, std::shared_ptr<VectorWriterInformationStruct> vectorWriterInfo, unsigned int ySliceForCalculation)
 {
-	std::vector<std::shared_ptr<TestSimulationImp> > testSimumlations;
-	for (int i = 0; i < testSimDataStruct.size(); i++) {
+	std::vector<std::shared_ptr<TestSimulationImp> > testSimulations;
+	for (size_t i = 0; i < testSimDataStruct.size(); i++) {
 		std::shared_ptr<TimeImp> time = TimeImp::getNewInstance();
 		testSimDataStruct.at(i)->simInformation->setTimeInfo(time);
 		std::shared_ptr<SimulationResults> simResult = SimulationResults::getNewInstance(testSimDataStruct.at(i)->simParameter);
 		std::shared_ptr<ToVectorWriter> toVectorWriter = Y2dSliceToResults::getNewInstance(vectorWriterInfo, testSimDataStruct.at(i)->simParameter->getTimeStepLength(), simResult, ySliceForCalculation);
 		
-		testSimumlations.push_back(TestSimulationImp::getNewInsance(testSimDataStruct.at(i), simResult, time, toVectorWriter, anaResultWriter, colorOutput));
+
+		auto currentTestSimData = testSimDataStruct.at(i);
+		auto para = vf::gpu::tests::makeParameter(currentTestSimData->simParameter);
+		currentTestSimData->initialCondition->setParameter(para);
+		auto vfsim = vf::gpu::tests::makeVirtualFluidSimulation(para, currentTestSimData->initialCondition, toVectorWriter);
+
+		auto testSim = std::make_shared<TestSimulationImp>(vfsim, currentTestSimData, simResult, time, toVectorWriter, anaResultWriter, colorOutput);
+		testSim->setParameter(para);
+
+		testSimulations.push_back(testSim);
 	}
 
-	return testSimumlations;
+	return testSimulations;
 }
 
 std::shared_ptr<TestStruct> NumericalTestFactoryImp::makePhiTestsStructs(std::shared_ptr<PhiTestParameterStruct> testParameter, std::vector<std::shared_ptr<TestSimulationImp>> testSimumlations, double viscosity)
@@ -257,13 +268,13 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makePhiTestsStructs(std::sh
 		std::shared_ptr<PhiTestLogFileInformation> testLogFileInfo = PhiTestLogFileInformation::getNewInstance(testParameter);
 		
 		std::vector<std::shared_ptr<PhiTestPostProcessingStrategy> > postProcessingStrategies;
-		for (int i = 0; i < testSimumlations.size(); i++)
+		for (size_t i = 0; i < testSimumlations.size(); i++)
 			postProcessingStrategies.push_back(PhiTestPostProcessingStrategy::getNewInstance(testSimumlations.at(i)->getSimulationResults(), testSimumlations.at(i)->getAnalyticalResults(), testParameter, testSimumlations.at(i)->getDataToCalcTests()));
 
-		for (int i = 0; i < testSimumlations.at(0)->getDataToCalcTests().size(); i++) {
+		for (size_t i = 0; i < testSimumlations.at(0)->getDataToCalcTests().size(); i++) {
 			std::vector<std::shared_ptr<PhiTest> > phiTests = makePhiTests(testParameter, testSimumlations, postProcessingStrategies, viscosity, testSimumlations.at(0)->getDataToCalcTests().at(i));
 			testLogFileInfo->addTestGroup(phiTests);
-			for (int j = 0; j < phiTests.size(); j++)
+			for (size_t j = 0; j < phiTests.size(); j++)
 				testStruct->tests.push_back(phiTests.at(j));
 		}
 		testStruct->logFileInfo = testLogFileInfo;
@@ -276,8 +287,8 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makePhiTestsStructs(std::sh
 std::vector<std::shared_ptr<PhiTest>> NumericalTestFactoryImp::makePhiTests(std::shared_ptr<PhiTestParameterStruct> testParameter, std::vector<std::shared_ptr<TestSimulationImp>> testSim, std::vector<std::shared_ptr<PhiTestPostProcessingStrategy>> phiPostProStrategy, double viscosity, std::string dataToCalculate)
 {
 	std::vector<std::shared_ptr<PhiTest> > phiTests;
-	for (int i = 1; i < testSim.size(); i++) {
-		for (int j = 0; j < i; j++) {
+	for (size_t i = 1; i < testSim.size(); i++) {
+		for (size_t j = 0; j < i; j++) {
 			std::shared_ptr<PhiTest> test = PhiTest::getNewInstance(colorOutput, viscosity, testParameter, dataToCalculate);
 			test->addSimulation(testSim.at(j), testSim.at(j)->getSimulationInfo(), phiPostProStrategy.at(j));
 			test->addSimulation(testSim.at(i), testSim.at(i)->getSimulationInfo(), phiPostProStrategy.at(i));
@@ -299,13 +310,13 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeNyTestsStructs(std::sha
 		std::shared_ptr<NyTestLogFileInformation> testLogFileInfo = NyTestLogFileInformation::getNewInstance(testParameter);
 
 		std::vector<std::shared_ptr<NyTestPostProcessingStrategy> > postProcessingStrategies;
-		for (int i = 0; i < testSimumlations.size(); i++)
+		for (size_t i = 0; i < testSimumlations.size(); i++)
 			postProcessingStrategies.push_back(NyTestPostProcessingStrategy::getNewInstance(testSimumlations.at(i)->getSimulationResults(), testSimumlations.at(i)->getAnalyticalResults(), testParameter, testSimumlations.at(i)->getDataToCalcTests()));
 
-		for (int i = 0; i < testSimumlations.at(0)->getDataToCalcTests().size(); i++) {
+		for (size_t i = 0; i < testSimumlations.at(0)->getDataToCalcTests().size(); i++) {
 			std::vector<std::shared_ptr<NyTest> > nyTests = makeNyTests(testParameter, testSimumlations, postProcessingStrategies, viscosity, testSimumlations.at(0)->getDataToCalcTests().at(i));
 			testLogFileInfo->addTestGroup(nyTests);
-			for (int j = 0; j < nyTests.size(); j++)
+			for (size_t j = 0; j < nyTests.size(); j++)
 				testStruct->tests.push_back(nyTests.at(j));
 		}
 		testStruct->logFileInfo = testLogFileInfo;
@@ -318,8 +329,8 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeNyTestsStructs(std::sha
 std::vector<std::shared_ptr<NyTest>> NumericalTestFactoryImp::makeNyTests(std::shared_ptr<NyTestParameterStruct> testParameter, std::vector<std::shared_ptr<TestSimulationImp>> testSim, std::vector<std::shared_ptr<NyTestPostProcessingStrategy>> nuPostProStrategy, double viscosity, std::string dataToCalculate)
 {
 	std::vector<std::shared_ptr<NyTest> > nyTests;
-	for (int i = 1; i < testSim.size(); i++) {
-		for (int j = 0; j < i; j++) {
+	for (size_t i = 1; i < testSim.size(); i++) {
+		for (size_t j = 0; j < i; j++) {
 			std::shared_ptr<NyTest> test = NyTest::getNewInstance(colorOutput, viscosity, testParameter, dataToCalculate);
 			test->addSimulation(testSim.at(j), testSim.at(j)->getSimulationInfo(), nuPostProStrategy.at(j));
 			test->addSimulation(testSim.at(i), testSim.at(i)->getSimulationInfo(), nuPostProStrategy.at(i));
@@ -339,13 +350,13 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeL2NormTestsStructs(std:
 
 	if (testParameter->basicTestParameter->runTest) {
 		std::vector<std::shared_ptr<L2NormPostProcessingStrategy> >  postProcessingStrategies;
-		for (int i = 0; i < testSimumlations.size(); i++)
+		for (size_t i = 0; i < testSimumlations.size(); i++)
 			postProcessingStrategies.push_back(L2NormPostProcessingStrategy::getNewInstance(testSimumlations.at(i)->getSimulationResults(), testSimumlations.at(i)->getAnalyticalResults(), testParameter, l2NormCalculatorFactory, testSimumlations.at(i)->getDataToCalcTests()));
 
 		std::vector<std::shared_ptr<L2NormTest> > tests = makeL2NormTests(testSimumlations, postProcessingStrategies, testParameter);
 		std::shared_ptr<L2NormInformation> testLogFileInfo = L2NormInformation::getNewInstance(tests, testParameter, testSimumlations.at(0)->getDataToCalcTests());
 
-		for(int i = 0; i < tests.size(); i++)
+		for(size_t i = 0; i < tests.size(); i++)
 			testStruct->tests.push_back(tests.at(i));
 		testStruct->logFileInfo = testLogFileInfo;
 		testStruct->testName = "L2NormTest";
@@ -356,9 +367,9 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeL2NormTestsStructs(std:
 std::vector<std::shared_ptr<L2NormTest> > NumericalTestFactoryImp::makeL2NormTests(std::vector<std::shared_ptr<TestSimulationImp> > testSim, std::vector<std::shared_ptr<L2NormPostProcessingStrategy> > postProStrategy, std::shared_ptr<L2NormTestParameterStruct> testParameter)
 {
 	std::vector<std::shared_ptr<L2NormTest> > l2Tests;
-	for (int k = 0; k < testParameter->normalizeData.size(); k++) {
-		for (int i = 0; i < testSim.size(); i++) {
-			for (int j = 0; j < testSim.at(i)->getDataToCalcTests().size(); j++) {
+	for (size_t k = 0; k < testParameter->normalizeData.size(); k++) {
+		for (size_t i = 0; i < testSim.size(); i++) {
+			for (size_t j = 0; j < testSim.at(i)->getDataToCalcTests().size(); j++) {
 				std::shared_ptr<L2NormTest> test = L2NormTest::getNewInstance(colorOutput, testParameter, testSim.at(i)->getDataToCalcTests().at(j), testParameter->maxDiff.at(k), testParameter->normalizeData.at(k));
 				test->addSimulation(testSim.at(i), testSim.at(i)->getSimulationInfo(), postProStrategy.at(i));
 				testSim.at(i)->registerSimulationObserver(test);
@@ -377,7 +388,7 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeL2NormTestsBetweenKerne
 	if (testPara->basicTestParameter->runTest) {
 
 		std::vector<std::shared_ptr<L2NormBetweenKernelPostProcessingStrategy> > postProcessingStrategies;
-		for (int i = 0; i < testSim.size(); i++)
+		for (size_t i = 0; i < testSim.size(); i++)
 			postProcessingStrategies.push_back(L2NormBetweenKernelPostProcessingStrategy::getNewInstance(testSim.at(i)->getSimulationResults(), testSim.at(i)->getAnalyticalResults(), testPara, l2NormCalculatorFactory, testSim.at(i)->getDataToCalcTests()));
 
 		if (kernelName == testPara->basicKernel) {
@@ -387,14 +398,14 @@ std::shared_ptr<TestStruct> NumericalTestFactoryImp::makeL2NormTestsBetweenKerne
 				l2NormTestsBetweenKernels = tests;
 			}
 			else {
-				for (int i = 0; i < tests.size(); i++)
-					for (int j = 0; j < tests.at(i).size(); j++)
+				for (size_t i = 0; i < tests.size(); i++)
+					for (size_t j = 0; j < tests.at(i).size(); j++)
 						l2NormTestsBetweenKernels.at(i).push_back(tests.at(i).at(j));
 			}
 
 		}else{
 			std::vector<std::shared_ptr<L2NormTestBetweenKernels> > tests = linkL2NormTestsBetweenKernels(testPara, testSim, postProcessingStrategies);
-			for (int i = 0; i < tests.size(); i++)
+			for (size_t i = 0; i < tests.size(); i++)
 				testStruct->tests.push_back(tests.at(i));
 			testStruct->logFileInfo = L2NormBetweenKernelsInformation::getNewInstance(tests, testPara, testSim.at(0)->getDataToCalcTests());
 		}
@@ -408,11 +419,11 @@ std::vector<std::vector<std::shared_ptr<L2NormTestBetweenKernels> > >  Numerical
 
 	std::vector<std::shared_ptr<L2NormTestBetweenKernels> > testForOneKernel;
 
-	for (int l = 0; l < testPara->kernelsToTest.size() - 1; l++) {
-		for (int k = 0; k < testSim.size(); k++) {
-			for(int j = 0; j < testSim.at(k)->getDataToCalcTests().size(); j++){
-				for (int m = 0; m < testPara->normalizeData.size(); m++) {
-					for (int i = 0; i < testPara->timeSteps.size(); i++) {
+	for (size_t l = 0; l < testPara->kernelsToTest.size() - 1; l++) {
+		for (size_t k = 0; k < testSim.size(); k++) {
+			for(size_t j = 0; j < testSim.at(k)->getDataToCalcTests().size(); j++){
+				for (size_t m = 0; m < testPara->normalizeData.size(); m++) {
+					for (size_t i = 0; i < testPara->timeSteps.size(); i++) {
 						std::shared_ptr<L2NormTestBetweenKernels> aTest = L2NormTestBetweenKernels::getNewInstance(colorOutput, testSim.at(k)->getDataToCalcTests().at(j), testPara->timeSteps.at(i), testPara->normalizeData.at(m), l2NormCalculatorFactory);
 						aTest->setBasicSimulation(testSim.at(k), testSim.at(k)->getSimulationInfo(), postProcessingStrategies.at(k));
 						testSim.at(k)->registerSimulationObserver(aTest);
@@ -436,10 +447,10 @@ std::vector<std::shared_ptr<L2NormTestBetweenKernels> > NumericalTestFactoryImp:
 		if (l2NormTestsBetweenKernels.at(0).size() == 0)
 			l2NormTestsBetweenKernels.erase(l2NormTestsBetweenKernels.begin());
 
-	for (int k = 0; k < testSim.size(); k++) {
-		for (int j = 0; j < testSim.at(k)->getDataToCalcTests().size(); j++) {
-			for (int m = 0; m < testPara->normalizeData.size(); m++) {
-				for (int i = 0; i < testPara->timeSteps.size(); i++) {
+	for (size_t k = 0; k < testSim.size(); k++) {
+		for (size_t j = 0; j < testSim.at(k)->getDataToCalcTests().size(); j++) {
+			for (size_t m = 0; m < testPara->normalizeData.size(); m++) {
+				for (size_t i = 0; i < testPara->timeSteps.size(); i++) {
 					std::shared_ptr<L2NormTestBetweenKernels> aTest = l2NormTestsBetweenKernels.at(0).at(0);
 					l2NormTestsBetweenKernels.at(0).erase(l2NormTestsBetweenKernels.at(0).begin());
 					aTest->setDivergentKernelSimulation(testSim.at(k), testSim.at(k)->getSimulationInfo(), postProcessingStrategies.at(k));
@@ -454,7 +465,7 @@ std::vector<std::shared_ptr<L2NormTestBetweenKernels> > NumericalTestFactoryImp:
 
 void NumericalTestFactoryImp::initTestStruct(std::shared_ptr<TestStruct> testStruct, std::shared_ptr<NumericalTestStruct> numericalTestStruct, std::vector<std::shared_ptr<TestLogFileInformation> > &testLogFileInfo, std::shared_ptr<BasicTestLogFileInformation> basicTestLogFileInfo)
 {
-	for (int i = 0; i < testStruct->tests.size(); i++)
+	for (size_t i = 0; i < testStruct->tests.size(); i++)
 		numericalTestStruct->tests.push_back(testStruct->tests.at(i));
 	if (testStruct->tests.size() > 0) {
 		testLogFileInfo.push_back(testStruct->logFileInfo);
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
index c7d9fa5b84ab077d60eab3213097c2da1d5b4dad..97ccf92dc72e253d5f38f88353ba564320e8fd65 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestGridReader/NumericalTestGridReader.cpp
@@ -17,11 +17,11 @@ void NumericalTestGridReader::setInitalNodeValues(const int numberOfNodes, const
 {
 	initialCondition->init(level);
 	for (int j = 0; j <= numberOfNodes; j++){
-		para->getParH(level)->vx_SP[j] = initialCondition->getInitVX(j, level);
-		para->getParH(level)->vy_SP[j] = initialCondition->getInitVY(j, level);
-		para->getParH(level)->vz_SP[j] = initialCondition->getInitVZ(j, level);
-		para->getParH(level)->rho_SP[j] = initialCondition->getInitROH(j, level);
-		para->getParH(level)->press_SP[j] = initialCondition->getInitPRESS(j, level);
+		para->getParH(level)->velocityX[j] = initialCondition->getInitVX(j, level);
+		para->getParH(level)->velocityY[j] = initialCondition->getInitVY(j, level);
+		para->getParH(level)->velocityZ[j] = initialCondition->getInitVZ(j, level);
+		para->getParH(level)->rho[j] = initialCondition->getInitROH(j, level);
+		para->getParH(level)->pressure[j] = initialCondition->getInitPRESS(j, level);
 	}
 }
 
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSimulation/NumericalTestSimulation.h b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSimulation/NumericalTestSimulation.h
index c2bd79ef8f6a1a23e10ec4e3c58c9b31329ec89a..b3835c11bf101c3a0e10beddba0095d0f1f0bef3 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSimulation/NumericalTestSimulation.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSimulation/NumericalTestSimulation.h
@@ -10,6 +10,7 @@ enum SimulationStatus { initialized , executed, crashed};
 class NumericalTestSimulation
 {
 public:
+	virtual void run() = 0;
 	virtual SimulationStatus getSimulationStatus() = 0;
 	virtual void registerSimulationObserver(std::shared_ptr<SimulationObserver> simObserver) = 0;
 };
diff --git a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSuite/NumericalTestSuite.h b/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSuite/NumericalTestSuite.h
deleted file mode 100644
index 86e576a6f8f5d460d70869f46b71ec02e49972e5..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/NumericalTestSuite/NumericalTestSuite.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef NUMERICAL_TEST_SUITE_H
-#define NUMERICAL_TEST_SUITE_H
-
-class NumericalTestSuite
-{
-public:
-	virtual void makeSimulationHeadOutput() = 0;
-	virtual void startPostProcessing() = 0;
-};
-#endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.cpp
index b454f1db95913562963561ef60d834ec0a9c1535..ef7f37dd9b6c5bee8878d4b7c4df076a5ff6b04d 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.cpp
@@ -1,140 +1,129 @@
 #include "ResultsImp.h"
 
+#include <cmath>
 #include <iostream>
 
+void logInvalidSimulationData(const std::string &quantity)
+{
+    std::cout << "done." << std::endl;
+    std::cout << "Invalid quantity: " << quantity << std::endl;
+    std::cout << "Simulation Result Data contains failure data." << std::endl;
+    std::cout << "Testing not possible." << std::endl;
+}
+
+bool isValid(const double quantity, const std::string &quantityName)
+{
+    if (std::isnan(quantity)) {
+        logInvalidSimulationData(quantityName);
+        return false;
+    }
+    return true;
+}
+
 int ResultsImp::getNumberOfTimeSteps()
 {
-	return numberOfTimeSteps;
+    return numberOfTimeSteps;
 }
 
-std::vector<std::vector<double> > ResultsImp::getVx()
+std::vector<std::vector<double>> ResultsImp::getVx()
 {
-	return vx;
+    return vx;
 }
 
-std::vector<std::vector<double> > ResultsImp::getVy()
+std::vector<std::vector<double>> ResultsImp::getVy()
 {
-	return vy;
+    return vy;
 }
 
-std::vector<std::vector<double> > ResultsImp::getVz()
+std::vector<std::vector<double>> ResultsImp::getVz()
 {
-	return vz;
+    return vz;
 }
 
 int ResultsImp::getNumberOfXNodes()
 {
-	return xNodes;
+    return xNodes;
 }
 
 int ResultsImp::getNumberOfYNodes()
 {
-	return yNodes;
+    return yNodes;
 }
 
 int ResultsImp::getNumberOfZNodes()
 {
-	return zNodes;
+    return zNodes;
 }
 
-std::vector<std::vector<double> > ResultsImp::getXNodes()
+std::vector<std::vector<double>> ResultsImp::getXNodes()
 {
-	return x;
+    return x;
 }
 
-std::vector<std::vector<double> > ResultsImp::getYNodes()
+std::vector<std::vector<double>> ResultsImp::getYNodes()
 {
-	return y;
+    return y;
 }
 
-std::vector<std::vector<double> > ResultsImp::getZNodes()
+std::vector<std::vector<double>> ResultsImp::getZNodes()
 {
-	return z;
+    return z;
 }
 
 int ResultsImp::getTimeStepLength()
 {
-	return timeStepLength;
+    return timeStepLength;
 }
 
 std::vector<unsigned int> ResultsImp::getTimeSteps()
 {
-	return timeStep;
+    return timeStep;
 }
 
 std::vector<int> ResultsImp::getTime()
 {
-	return time;
+    return time;
 }
 
-std::vector<std::vector<unsigned int> > ResultsImp::getLevels()
+std::vector<std::vector<unsigned int>> ResultsImp::getLevels()
 {
-	return level;
+    return level;
 }
 
-std::vector<std::vector<double> > ResultsImp::getPress()
+std::vector<std::vector<double>> ResultsImp::getPress()
 {
-	return press;
+    return press;
 }
 
-std::vector<std::vector<double> > ResultsImp::getRho()
+std::vector<std::vector<double>> ResultsImp::getRho()
 {
-	return rho;
+    return rho;
 }
 
 int ResultsImp::getL0()
 {
-	return l0;
+    return l0;
 }
 
 bool ResultsImp::checkYourData()
 {
-	std::cout << "checking Simulation Results Data...";
-	for (int i = 0; i < vx.size(); i++) {
-		for (int j = 0; j < vx.at(i).size(); j++) {
-			if (vx.at(i).at(j) != vx.at(i).at(j)) {
-				std::cout << "done." << std::endl;
-				std::cout << "Simulation Result Data contains failure data." << std::endl;
-				std::cout << "Testing not possible." << std::endl;
-				return false;
-			}
-			if (vy.at(i).at(j) != vy.at(i).at(j)) {
-				std::cout << "done." << std::endl;
-				std::cout << "Simulation Result Data contains failure data." << std::endl;
-				std::cout << "Testing not possible." << std::endl;
-				return false;
-			}
-			if (vz.at(i).at(j) != vz.at(i).at(j)) {
-				std::cout << "done." << std::endl;
-				std::cout << "Simulation Result Data contains failure data." << std::endl;
-				std::cout << "Testing not possible." << std::endl;
-				return false;
-			}
-			if (rho.at(i).at(j) != rho.at(i).at(j)) {
-				std::cout << "done." << std::endl;
-				std::cout << "Simulation Result Data contains failure data." << std::endl;
-				std::cout << "Testing not possible." << std::endl;
-				return false;
-			}
-			if (press.at(i).at(j) != press.at(i).at(j)) {
-				std::cout << "done." << std::endl;
-				std::cout << "Simulation Result Data contains failure data." << std::endl;
-				std::cout << "Testing not possible." << std::endl;
-				return false;
-			}
-		}
-	}
-	std::cout << "done." << std::endl;
-	std::cout << "Simulation Result Data contains no failure data." << std::endl;
-	return true;
-}
+    std::cout << "checking Simulation Results Data...";
+    for (int i = 0; i < vx.size(); i++) {
+        for (int j = 0; j < vx.at(i).size(); j++) {
+            bool valid = isValid(vx.at(i).at(j), "Vx") && isValid(vy.at(i).at(j), "Vy") &&
+                         isValid(vz.at(i).at(j), "Vz") && isValid(rho.at(i).at(j), "Rho") &&
+                         isValid(press.at(i).at(j), "Pressure");
 
-ResultsImp::ResultsImp(int l0)
-{
-	this->l0 = l0;
+            if (!valid)
+                return false;
+        }
+    }
+    std::cout << "done." << std::endl;
+    std::cout << "Simulation Result Data contains no failure data." << std::endl;
+    return true;
 }
 
-ResultsImp::ResultsImp()
+ResultsImp::ResultsImp(int l0)
 {
-
+    this->l0 = l0;
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.h b/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.h
index a9129dc88e461c29d330cadad0f43a5017c1c110..4e4cc80c9e3fefc86b174859bdc813b6dba22c99 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/Results/ResultsImp.h
@@ -28,7 +28,7 @@ public:
 
 protected:
 	ResultsImp(int l0);
-	ResultsImp();
+	ResultsImp() = default;
 
 	unsigned int numberOfTimeSteps;
 	unsigned int timeStepLength;
@@ -46,6 +46,5 @@ protected:
 	int l0;
 
 private:
-
 };
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Results/SimulationResults/SimulationResults.h b/apps/gpu/tests/NumericalTests/Utilities/Results/SimulationResults/SimulationResults.h
index 80bcadd3e2655233309a7e4001409dd9e83c60d7..86d5c89b1747639bdc983627509d456601e836f3 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Results/SimulationResults/SimulationResults.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/Results/SimulationResults/SimulationResults.h
@@ -9,11 +9,12 @@ class SimulationParameter;
 class SimulationResults : public ResultsImp
 {
 public:
-	static std::shared_ptr<SimulationResults> getNewInstance(std::shared_ptr<SimulationParameter> simPara);
-	void addTimeStep(unsigned int timeStep, unsigned int time, std::vector<unsigned int> level, std::vector<double> x, std::vector<double> y, std::vector<double> z, std::vector<double> vx, std::vector<double> vy, std::vector<double> vz, std::vector<double> press, std::vector<double> rho);
+    static std::shared_ptr<SimulationResults> getNewInstance(std::shared_ptr<SimulationParameter> simPara);
+    void addTimeStep(unsigned int timeStep, unsigned int time, std::vector<unsigned int> level, std::vector<double> x,
+                     std::vector<double> y, std::vector<double> z, std::vector<double> vx, std::vector<double> vy,
+                     std::vector<double> vz, std::vector<double> press, std::vector<double> rho);
 
 private:
-	SimulationResults(std::shared_ptr<SimulationParameter> simPara);
-
+    SimulationResults(std::shared_ptr<SimulationParameter> simPara);
 };
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Test/Test.h b/apps/gpu/tests/NumericalTests/Utilities/Test/Test.h
index af180adb96e8d8f31ea746bb8ea55d16d170622c..c5809671330d2ac5a32f13f3db0d6d3690f06b6f 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Test/Test.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/Test/Test.h
@@ -10,6 +10,7 @@
 class Test : public SimulationObserver 
 {
 public:
+	virtual void run() = 0;
 	virtual void update() = 0;
 
 	virtual TestStatus getTestStatus() = 0;
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.cpp
index 1da57e18bf418901163ee3c636b4274bbf74f4cc..f893d1f174b949d371e193d23e7c137e79460008 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.cpp
@@ -1,94 +1,108 @@
 #include "TestImp.h"
+#include <algorithm>
 
 #include "Utilities/ColorConsoleOutput/ColorConsoleOutput.h"
-#include "Utilities/PostProcessingStrategy/PostProcessingStrategy.h"
 #include "Utilities/NumericalTestSimulation/NumericalTestSimulation.h"
+#include "Utilities/PostProcessingStrategy/PostProcessingStrategy.h"
+#include "Utilities/SimulationInfo/SimulationInfo.h"
+
+void TestImp::run()
+{
+    for (size_t i = 0; i < simulations.size(); i++) {
+        auto sim = simulations.at(i);
+        auto simInfo = simInfos.at(i);
+
+        // NOTE: Simulations can be in this vector multiple times
+        // Therefore, we skip the simulation if it has been run already
+        if (simulationRun.at(i))
+            continue;
+        sim->run();
+    }
+}
 
 void TestImp::update()
 {
-	for (int i = 0; i < simulations.size(); i++){
-		if (simulationRun.at(i) == false){
-			switch (simulations.at(i)->getSimulationStatus())
-			{
-			case executed: 
-				simulationRun.at(i) = true;
-				postProStrategies.at(i)->evaluate();
-				break;
-			case crashed:
-				simulationRun.at(i) = true;
-				testStatus = simulationCrashed;
-				break;
-			case initialized:
-				simulationRun.at(i) = false;
-				break;
-			default:
-				break;
-			}
-		}
-	}
-	if (CheckAllSimulationRun()) {
-		if (testStatus != simulationCrashed)
-			evaluate();
-		else
-			makeConsoleOutput();
-	}
-	
+    for (size_t i = 0; i < simulations.size(); i++) {
+        if (simulationRun.at(i) == false) {
+            switch (simulations.at(i)->getSimulationStatus()) {
+                case executed:
+                    simulationRun.at(i) = true;
+                    postProStrategies.at(i)->evaluate();
+                    break;
+                case crashed:
+                    simulationRun.at(i) = true;
+                    testStatus = simulationCrashed;
+                    break;
+                case initialized:
+                    simulationRun.at(i) = false;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    if (CheckAllSimulationRun()) {
+        if (testStatus != simulationCrashed)
+            evaluate();
+        else
+            makeConsoleOutput();
+    }
 }
 
-void TestImp::addSimulation(std::shared_ptr<NumericalTestSimulation> sim, std::shared_ptr<SimulationInfo> simInfo, std::shared_ptr<PostProcessingStrategy> postProStrategy)
+void TestImp::addSimulation(std::shared_ptr<NumericalTestSimulation> sim, std::shared_ptr<SimulationInfo> simInfo,
+                            std::shared_ptr<PostProcessingStrategy> postProStrategy)
 {
-	simulations.push_back(sim);
-	simInfos.push_back(simInfo);
-	postProStrategies.push_back(postProStrategy);
-	simulationRun.push_back(false);
+    simulations.push_back(sim);
+    simInfos.push_back(simInfo);
+    postProStrategies.push_back(postProStrategy);
+    simulationRun.push_back(false);
 }
 
 TestStatus TestImp::getTestStatus()
 {
-	return testStatus;
+    return testStatus;
 }
 
 void TestImp::makeConsoleOutput()
 {
-	switch (testStatus)
-	{
-	case passed: colorOutput->makeTestOutput(buildTestOutput(), testStatus);
-		break;
-	case failed: colorOutput->makeTestOutput(buildTestOutput(), testStatus);
-		break;
-	case test_error: colorOutput->makeTestOutput(buildErrorTestOutput(), testStatus);
-		break;
-	case simulationCrashed: colorOutput->makeTestOutput(buildSimulationFailedTestOutput(), testStatus);
-		break;
-	default:
-		break;
-	}
+    switch (testStatus) {
+        case passed:
+            colorOutput->makeTestOutput(buildTestOutput(), testStatus);
+            break;
+        case failed:
+            colorOutput->makeTestOutput(buildTestOutput(), testStatus);
+            break;
+        case test_error:
+            colorOutput->makeTestOutput(buildErrorTestOutput(), testStatus);
+            break;
+        case simulationCrashed:
+            colorOutput->makeTestOutput(buildSimulationFailedTestOutput(), testStatus);
+            break;
+        default:
+            break;
+    }
 }
 
 TestImp::TestImp(std::shared_ptr<ColorConsoleOutput> colorOutput) : colorOutput(colorOutput)
 {
-	simulationRun.resize(0);
-	simulations.resize(0);
-	simInfos.resize(0);
+    simulationRun.resize(0);
+    simulations.resize(0);
+    simInfos.resize(0);
 }
 
 bool TestImp::CheckAllSimulationRun()
 {
-	for(int i=0; i< simulationRun.size(); i++)
-		if(simulationRun.at(i)==false)
-			return false;
-	
-	return true;
+    return std::all_of(simulationRun.begin(), simulationRun.end(), [](bool run) { return run; });
 }
 
 std::vector<std::string> TestImp::buildSimulationFailedTestOutput()
 {
-	std::vector<std::string> output = buildBasicTestOutput();
-	std::ostringstream oss;
+    std::vector<std::string> output = buildBasicTestOutput();
+    std::ostringstream oss;
 
-	oss << "Simulation crashed!";
-	output.push_back(oss.str());
-	oss.str(std::string());
+    oss << "Simulation crashed!";
+    output.push_back(oss.str());
+    oss.str(std::string());
 
-	return output;
+    return output;
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.h b/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.h
index 9e18ef3116d155520b1c02d6d7616c926c32c315..f4fc758ec75f3c0e12b79faf2efb74a2e749e00e 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/Test/TestImp.h
@@ -15,6 +15,7 @@ class PostProcessingStrategy;
 class TestImp : public Test
 {
 public:
+	void run() override;
 	void update();
 	TestStatus getTestStatus();
 	virtual void makeConsoleOutput();
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueue.h b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueue.h
index bb1bf9579504b888f6f8d5297d9096b742c70a1c..dcc0ece599e45539fd1416b3cd06bb67576a915a 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueue.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueue.h
@@ -1,10 +1,13 @@
 #ifndef TEST_QUEUE_H
 #define TEST_QUEUE_H
 
+enum TestSuiteResult { PASSED, FAILED };
+
 class TestQueue
 {
 public:
-	virtual void makeFinalOutput() = 0;
+    virtual TestSuiteResult run() = 0;
+    virtual void makeFinalOutput() = 0;
     virtual int getNumberOfFailedTests() const noexcept = 0;
 };
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.cpp
index 5434c335c14ab6ed4f5a647089912417618db199..c7ceef408bcc7d098810aa0f8fd82498902ee211 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.cpp
@@ -1,63 +1,77 @@
 #include "TestQueueImp.h"
+#include <algorithm>
 
 #include "Utilities/ColorConsoleOutput/ColorConsoleOutput.h"
 #include "Utilities/Test/Test.h"
 
+TestSuiteResult TestQueueImp::run()
+{
+    for (const auto test : tests)
+        test->run();
+
+    makeFinalOutput();
+
+    return TestSuiteResult(std::clamp(numberOfFailedTest, 0, 1));
+}
+
 void TestQueueImp::makeFinalOutput()
 {
-	calcTestNumbers();
-	colorOutput->makeFinalTestOutputHead(numberOfTests, numberOfExecutedTest, numberOfPassedTest, numberOfFailedTest, numberOfErrorTest, numberOfNotExecutedTest);
-	for (int i = 0; i < tests.size(); i++)
-		tests.at(i)->makeConsoleOutput();
-	colorOutput->makeFinalTestOutputFoot(numberOfTests, numberOfExecutedTest, numberOfPassedTest, numberOfFailedTest, numberOfErrorTest, numberOfNotExecutedTest);
+    calcTestNumbers();
+    colorOutput->makeFinalTestOutputHead(numberOfTests, numberOfExecutedTest, numberOfPassedTest, numberOfFailedTest,
+                                         numberOfErrorTest, numberOfNotExecutedTest);
+    for (int i = 0; i < tests.size(); i++)
+        tests.at(i)->makeConsoleOutput();
+    colorOutput->makeFinalTestOutputFoot(numberOfTests, numberOfExecutedTest, numberOfPassedTest, numberOfFailedTest,
+                                         numberOfErrorTest, numberOfNotExecutedTest);
 }
 
-int TestQueueImp::getNumberOfFailedTests() const noexcept { return numberOfFailedTest; }
+int TestQueueImp::getNumberOfFailedTests() const noexcept
+{
+    return numberOfFailedTest;
+}
 
 std::shared_ptr<TestQueueImp> TestQueueImp::getNewInstance(std::shared_ptr<ColorConsoleOutput> colorOutput)
 {
-	return std::shared_ptr<TestQueueImp>(new TestQueueImp(colorOutput));
+    return std::shared_ptr<TestQueueImp>(new TestQueueImp(colorOutput));
 }
 
 void TestQueueImp::addTest(std::shared_ptr<Test> test)
 {
-	tests.push_back(test);
+    tests.push_back(test);
 }
 
 TestQueueImp::TestQueueImp(std::shared_ptr<ColorConsoleOutput> colorOutput) : colorOutput(colorOutput)
 {
-	tests.resize(0);
+    tests.resize(0);
 }
 
 void TestQueueImp::calcTestNumbers()
 {
-	numberOfTests = tests.size();
-	numberOfExecutedTest = 0;
-	numberOfPassedTest = 0;
-	numberOfFailedTest = 0;
-	numberOfErrorTest = 0;
-	numberOfNotExecutedTest = 0;
-
-	for (int i = 0; i < tests.size(); i++) {
-		switch (tests.at(i)->getTestStatus())
-		{
-		case passed:
-			numberOfPassedTest++;
-			numberOfExecutedTest++;
-			break;
-		case failed:
-			numberOfFailedTest++;
-			numberOfExecutedTest++;
-			break;
-		case test_error: 
-			numberOfErrorTest++;
-			break;
-		case simulationCrashed:
-			numberOfNotExecutedTest++;
-			break;
-		default:
-			break;
-		}
-	}
-			
+    numberOfTests = tests.size();
+    numberOfExecutedTest = 0;
+    numberOfPassedTest = 0;
+    numberOfFailedTest = 0;
+    numberOfErrorTest = 0;
+    numberOfNotExecutedTest = 0;
+
+    for (int i = 0; i < tests.size(); i++) {
+        switch (tests.at(i)->getTestStatus()) {
+            case passed:
+                numberOfPassedTest++;
+                numberOfExecutedTest++;
+                break;
+            case failed:
+                numberOfFailedTest++;
+                numberOfExecutedTest++;
+                break;
+            case test_error:
+                numberOfErrorTest++;
+                break;
+            case simulationCrashed:
+                numberOfNotExecutedTest++;
+                break;
+            default:
+                break;
+        }
+    }
 }
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.h b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.h
index 27505efc7b3a58bb155f40813d87a7aab78157bc..89952773f810a983d34f9e35e9508e9d0d7e3ce3 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestQueue/TestQueueImp.h
@@ -12,6 +12,7 @@ class ColorConsoleOutput;
 class TestQueueImp : public TestQueue
 {
 public:
+	TestSuiteResult run() override;
 	void makeFinalOutput();
 
 	int getNumberOfFailedTests() const noexcept override;
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulation.h b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulation.h
index 80f72ec3dfa82fdc9871e9db32532f204ddd12db..729ca06a3a57bfc1202d4f93728a68b18a57f8db 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulation.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulation.h
@@ -1,8 +1,6 @@
 #ifndef TEST_SIMULATION_H
 #define TEST_SIMULATION_H
 
-#include "Utilities/NumericalTestSuite/NumericalTestSuite.h"
-
 #include <memory>
 #include <string>
 
@@ -14,13 +12,15 @@ class SimulationObserver;
 class TimeTracking;
 class Parameter;
 
-class TestSimulation : public NumericalTestSuite
+class TestSimulation
 {
 public:
-	virtual std::shared_ptr<SimulationParameter> getSimulationParameter() = 0;
-	virtual std::shared_ptr<DataWriter> getDataWriter() = 0;
-	virtual std::shared_ptr<InitialCondition> getInitialCondition() = 0;
-	virtual std::shared_ptr<TimeTracking> getTimeTracking() = 0;
-	virtual void setParameter(std::shared_ptr<Parameter> para) = 0;
+    virtual void run() = 0;
+	virtual void makeSimulationHeadOutput() = 0;
+	virtual void startPostProcessing() = 0;
+
+    virtual std::shared_ptr<SimulationParameter> getSimulationParameter() = 0;
+    virtual std::shared_ptr<TimeTracking> getTimeTracking() = 0;
+    virtual void setParameter(std::shared_ptr<Parameter> para) = 0;
 };
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.cpp
index d07a40efa19511437392359546f4c51b692d2cd5..dc4aca9b35b0bea92b3d8e3e5b07cb8ed3e054d5 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.cpp
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.cpp
@@ -1,136 +1,139 @@
+#include <functional>
+
 #include "TestSimulationImp.h"
 
-#include "Utilities/DataWriter/ToVectorWriter.h"
-#include "Utilities/SimulationInfo/SimulationInfo.h"
-#include "Utilities/Results/AnalyticalResults/AnalyticalResult.h"
-#include "Utilities/Test/SimulationObserver.h"
 #include "Utilities/ColorConsoleOutput/ColorConsoleOutput.h"
-#include "Utilities/KernelConfiguration/KernelConfiguration.h"
 #include "Utilities/DataWriter/AnalyticalResults2DToVTKWriter/AnalyticalResults2DToVTKWriter.h"
+#include "Utilities/DataWriter/ToVectorWriter.h"
+#include "Utilities/InitialCondition/InitialCondition.h"
+#include "Utilities/KernelConfiguration/KernelConfiguration.h"
+#include "Utilities/Results/AnalyticalResults/AnalyticalResult.h"
+#include "Utilities/Results/SimulationResults/SimulationResults.h"
+#include "Utilities/SimulationInfo/SimulationInfo.h"
 #include "Utilities/Structs/TestSimulationDataStruct.h"
+#include "Utilities/Test/SimulationObserver.h"
 #include "Utilities/Time/TimeTracking.h"
-#include "Utilities/Results/SimulationResults/SimulationResults.h"
-
 
-std::shared_ptr<TestSimulationImp> TestSimulationImp::getNewInsance(std::shared_ptr<TestSimulationDataStruct> testSimData, std::shared_ptr<SimulationResults> simResult, std::shared_ptr<TimeTracking> timeTracking, std::shared_ptr<ToVectorWriter> toVectorWriter, std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter, std::shared_ptr<ColorConsoleOutput> colorOutput)
+TestSimulationImp::TestSimulationImp(std::function<void()> runSimulation,
+                                     std::shared_ptr<TestSimulationDataStruct> testSimData,
+                                     std::shared_ptr<SimulationResults> simResult,
+                                     std::shared_ptr<TimeTracking> timeTracking,
+                                     std::shared_ptr<ToVectorWriter> toVectorWriter,
+                                     std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter,
+                                     std::shared_ptr<ColorConsoleOutput> colorOutput)
 {
-	return std::shared_ptr<TestSimulationImp>(new TestSimulationImp(testSimData, simResult, timeTracking, toVectorWriter, anaResultWriter, colorOutput));
-}
+    this->simPara = testSimData->simParameter;
+    this->simInfo = testSimData->simInformation;
+    this->analyticalResult = testSimData->analyticalResult;
+    this->initialCondition = testSimData->initialCondition;
 
-TestSimulationImp::TestSimulationImp(std::shared_ptr<TestSimulationDataStruct> testSimData, std::shared_ptr<SimulationResults> simResult, std::shared_ptr<TimeTracking> timeTracking, std::shared_ptr<ToVectorWriter> toVectorWriter, std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter, std::shared_ptr<ColorConsoleOutput> colorOutput)
-{
-	this->simPara = testSimData->simParameter;
-	this->simInfo = testSimData->simInformation;
-	this->analyticalResult = testSimData->analyticalResult;
-	this->initialCondition = testSimData->initialCondition;
+    this->timeTracking = timeTracking;
 
-	this->timeTracking = timeTracking;
+    this->simResult = simResult;
+    this->toVectorWriter = toVectorWriter;
 
-	this->simResult = simResult;
-	this->toVectorWriter = toVectorWriter;
+    this->anaResultWriter = anaResultWriter;
+    this->colorOutput = colorOutput;
 
-	this->anaResultWriter = anaResultWriter;
-	this->colorOutput = colorOutput;
-	
-	this->simObserver.resize(0);
-	this->dataToCalcTests = simInfo->getDataToCalcTests();
-	this->status = initialized;
+    this->simObserver.resize(0);
+    this->dataToCalcTests = simInfo->getDataToCalcTests();
+    this->status = initialized;
+    this->runSimulation = runSimulation;
 }
 
-std::shared_ptr<SimulationParameter> TestSimulationImp::getSimulationParameter()
+void TestSimulationImp::run()
 {
-	return simPara;
+    makeSimulationHeadOutput();
+    timeTracking->setSimulationStartTime();
+    runSimulation();
+    timeTracking->setSimulationEndTime();
+    startPostProcessing();
 }
 
-std::shared_ptr<AnalyticalResults> TestSimulationImp::getAnalyticalResults()
-{
-	return analyticalResult;
-}
-
-std::shared_ptr<DataWriter> TestSimulationImp::getDataWriter()
+std::shared_ptr<SimulationParameter> TestSimulationImp::getSimulationParameter()
 {
-	return toVectorWriter;
+    return simPara;
 }
 
-std::shared_ptr<InitialCondition> TestSimulationImp::getInitialCondition()
+std::shared_ptr<AnalyticalResults> TestSimulationImp::getAnalyticalResults()
 {
-	return initialCondition;
+    return analyticalResult;
 }
 
 std::shared_ptr<SimulationInfo> TestSimulationImp::getSimulationInfo()
 {
-	return simInfo;
+    return simInfo;
 }
 
 std::shared_ptr<TimeTracking> TestSimulationImp::getTimeTracking()
 {
-	return timeTracking;
+    return timeTracking;
 }
 
 SimulationStatus TestSimulationImp::getSimulationStatus()
 {
-	return status;
+    return status;
 }
 
 void TestSimulationImp::registerSimulationObserver(std::shared_ptr<SimulationObserver> simObserver)
 {
-	this->simObserver.push_back(simObserver);
+    this->simObserver.push_back(simObserver);
 }
 
 std::vector<std::string> TestSimulationImp::getDataToCalcTests()
 {
-	return dataToCalcTests;
+    return dataToCalcTests;
 }
 
 void TestSimulationImp::notifyObserver()
 {
-	for (int i = 0; i < simObserver.size(); i++)
-		simObserver.at(i)->update();
+    for (int i = 0; i < simObserver.size(); i++)
+        simObserver.at(i)->update();
 }
 
 void TestSimulationImp::writeAnalyticalResultsToVTK()
 {
-	if (!analyticalResult->isCalculated())
-		analyticalResult->calc(simResult);
+    if (!analyticalResult->isCalculated())
+        analyticalResult->calc(simResult);
 
-	anaResultWriter->writeAnalyticalResult(para, analyticalResult);
+    anaResultWriter->writeAnalyticalResult(para, analyticalResult);
 }
 
 void TestSimulationImp::checkSimulationResults()
 {
-	bool dataOkay = simResult->checkYourData();
-	if (!dataOkay)
-		status = crashed;
+    bool dataOkay = simResult->checkYourData();
+    if (!dataOkay)
+        status = crashed;
 }
 
 void TestSimulationImp::makeSimulationHeadOutput()
 {
-	colorOutput->makeSimulationHeadOutput(simInfo);
+    colorOutput->makeSimulationHeadOutput(simInfo);
 }
 
 void TestSimulationImp::startPostProcessing()
 {
-	status = executed;
+    status = executed;
 
-	timeTracking->setResultCheckStartTime();
-	checkSimulationResults();
-	timeTracking->setResultCheckEndTime();
+    timeTracking->setResultCheckStartTime();
+    checkSimulationResults();
+    timeTracking->setResultCheckEndTime();
 
-	timeTracking->setTestStartTime();
-	notifyObserver();
-	timeTracking->setTestEndTime();
+    timeTracking->setTestStartTime();
+    notifyObserver();
+    timeTracking->setTestEndTime();
 
-	timeTracking->setAnalyticalResultWriteStartTime();
-	writeAnalyticalResultsToVTK();
-	timeTracking->setAnalyticalResultWriteEndTime();
+    timeTracking->setAnalyticalResultWriteStartTime();
+    writeAnalyticalResultsToVTK();
+    timeTracking->setAnalyticalResultWriteEndTime();
 }
 
 void TestSimulationImp::setParameter(std::shared_ptr<Parameter> para)
 {
-	this->para = para;
+    this->para = para;
 }
 
 std::shared_ptr<SimulationResults> TestSimulationImp::getSimulationResults()
 {
-	return simResult;
+    return simResult;
 }
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.h b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.h
index 3117d6c61e619b8d51f625920837dc31656829d8..b876cb079f241226bbb3455f7b4bc97b80039131 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/TestSimulation/TestSimulationImp.h
@@ -4,8 +4,9 @@
 #include "TestSimulation.h"
 #include "Utilities/NumericalTestSimulation/NumericalTestSimulation.h"
 
-#include <vector>
+#include <functional>
 #include <time.h>
+#include <vector>
 
 class ToVectorWriter;
 class ColorConsoleOutput;
@@ -20,48 +21,52 @@ struct TestSimulationDataStruct;
 class TestSimulationImp : public TestSimulation, public NumericalTestSimulation
 {
 public:
-	static std::shared_ptr<TestSimulationImp> getNewInsance(std::shared_ptr<TestSimulationDataStruct> testSimData, std::shared_ptr<SimulationResults> simResult, std::shared_ptr<TimeTracking> timeTracking, std::shared_ptr<ToVectorWriter> toVectorWriter, std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter, std::shared_ptr<ColorConsoleOutput> colorOutput);
+    TestSimulationImp(std::function<void()> runSimulation, std::shared_ptr<TestSimulationDataStruct> testSimData,
+                      std::shared_ptr<SimulationResults> simResult, std::shared_ptr<TimeTracking> timeTracking,
+                      std::shared_ptr<ToVectorWriter> toVectorWriter,
+                      std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter,
+                      std::shared_ptr<ColorConsoleOutput> colorOutput);
+    void run() override;
+
+    std::shared_ptr<SimulationParameter> getSimulationParameter();
+    std::shared_ptr<SimulationInfo> getSimulationInfo();
+    std::shared_ptr<TimeTracking> getTimeTracking();
 
-	std::shared_ptr<SimulationParameter> getSimulationParameter();
-	std::shared_ptr<DataWriter> getDataWriter();
-	std::shared_ptr<InitialCondition> getInitialCondition();
-	std::shared_ptr<SimulationInfo> getSimulationInfo();
-	std::shared_ptr<TimeTracking> getTimeTracking();
+    SimulationStatus getSimulationStatus();
 
-	SimulationStatus getSimulationStatus();
-	
-	void makeSimulationHeadOutput();
-	void startPostProcessing();
+    void makeSimulationHeadOutput();
+    void startPostProcessing();
 
-	void setParameter(std::shared_ptr<Parameter> para);
+    void setParameter(std::shared_ptr<Parameter> para);
 
-	std::shared_ptr<SimulationResults> getSimulationResults();
-	std::shared_ptr<AnalyticalResults> getAnalyticalResults();
-	void registerSimulationObserver(std::shared_ptr<SimulationObserver> simObserver);
-	std::vector<std::string> getDataToCalcTests();
+    std::shared_ptr<SimulationResults> getSimulationResults();
+    std::shared_ptr<AnalyticalResults> getAnalyticalResults();
+    void registerSimulationObserver(std::shared_ptr<SimulationObserver> simObserver);
+    std::vector<std::string> getDataToCalcTests();
 
 private:
-	TestSimulationImp(std::shared_ptr<TestSimulationDataStruct> testSimData, std::shared_ptr<SimulationResults> simResult, std::shared_ptr<TimeTracking> timeTracking, std::shared_ptr<ToVectorWriter> toVectorWriter, std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter, std::shared_ptr<ColorConsoleOutput> colorOutput);
-	void notifyObserver();
-
-	void writeAnalyticalResultsToVTK();
-	void checkSimulationResults();
-
-	std::shared_ptr<SimulationParameter> simPara;
-	std::shared_ptr<ToVectorWriter> toVectorWriter;
-	std::shared_ptr<InitialCondition> initialCondition;
-	std::shared_ptr<SimulationInfo> simInfo;
-	std::shared_ptr<SimulationResults> simResult;
-	std::shared_ptr<TimeTracking> timeTracking;
-
-	std::shared_ptr<AnalyticalResults> analyticalResult;
-
-	std::shared_ptr<ColorConsoleOutput> colorOutput;
-	std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter;
-	std::shared_ptr<Parameter> para;
-	std::vector<std::shared_ptr<SimulationObserver> > simObserver;
-	
-	std::vector<std::string> dataToCalcTests;
-	SimulationStatus status;
+    void notifyObserver();
+
+    void writeAnalyticalResultsToVTK();
+    void checkSimulationResults();
+
+    std::shared_ptr<SimulationParameter> simPara;
+    std::shared_ptr<ToVectorWriter> toVectorWriter;
+    std::shared_ptr<InitialCondition> initialCondition;
+    std::shared_ptr<SimulationInfo> simInfo;
+    std::shared_ptr<SimulationResults> simResult;
+    std::shared_ptr<TimeTracking> timeTracking;
+
+    std::shared_ptr<AnalyticalResults> analyticalResult;
+
+    std::shared_ptr<ColorConsoleOutput> colorOutput;
+    std::shared_ptr<AnalyticalResults2DToVTKWriter> anaResultWriter;
+    std::shared_ptr<Parameter> para;
+    std::vector<std::shared_ptr<SimulationObserver>> simObserver;
+
+    std::vector<std::string> dataToCalcTests;
+    SimulationStatus status;
+
+    std::function<void()> runSimulation;
 };
 #endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulation.h b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulation.h
deleted file mode 100644
index 36e1eabd9af51a0e415da876faca8d38c691cbad..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulation.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef VIRTUAL_FLUID_SIMULATION_H
-#define VIRTUAL_FLUID_SIMULATION_H
-
-#include "VirtualFluids_GPU/LBM/LB.h"
-
-class VirtualFluidSimulation
-{
-public:
-	virtual void run() = 0;
-private:
-
-};
-#endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.cpp
deleted file mode 100644
index 35b0369f966fe11dce8581f609a10078180a20bb..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "VirtualFluidSimulationImp.h"
-
-#include "Utilities/NumericalTestSuite/NumericalTestSuite.h"
-#include "Utilities/Time/TimeTracking.h"
-
-#include "VirtualFluids_GPU/LBM/Simulation.h"
-
-#include <sstream>
-
-void VirtualFluidSimulationImp::run()
-{
-	numericalTestSuite->makeSimulationHeadOutput();
-	Simulation sim;
-	sim.setFactories(kernelFactory, preProcessorFactory);
-	sim.init(para, grid, dataWriter, cudaManager);
-
-	timeTracking->setSimulationStartTime();
-	sim.run();
-	timeTracking->setSimulationEndTime();
-
-	numericalTestSuite->startPostProcessing();
-
-	sim.free();
-}
-
-void VirtualFluidSimulationImp::setParameter(std::shared_ptr<Parameter> para)
-{
-	this->para = para;
-}
-
-void VirtualFluidSimulationImp::setCudaMemoryManager(std::shared_ptr<CudaMemoryManager> cudaManager)
-{
-	this->cudaManager = cudaManager;
-}
-
-void VirtualFluidSimulationImp::setGridProvider(std::shared_ptr<GridProvider> grid)
-{
-	this->grid = grid;
-}
-
-std::shared_ptr<VirtualFluidSimulationImp> VirtualFluidSimulationImp::getNewInstance()
-{
-	return std::shared_ptr<VirtualFluidSimulationImp>(new VirtualFluidSimulationImp());
-}
-
-void VirtualFluidSimulationImp::setDataWriter(std::shared_ptr<DataWriter> dataWriter)
-{
-	this->dataWriter = dataWriter;
-}
-
-void VirtualFluidSimulationImp::setNumericalTestSuite(std::shared_ptr<NumericalTestSuite> numericalTestSuite)
-{
-	this->numericalTestSuite = numericalTestSuite;
-}
-
-void VirtualFluidSimulationImp::setTimeTracking(std::shared_ptr<TimeTracking> timeTracking)
-{
-	this->timeTracking = timeTracking;
-}
-
-void VirtualFluidSimulationImp::setKernelFactory(std::shared_ptr<KernelFactory> kernelFactory)
-{
-	this->kernelFactory = kernelFactory;
-}
-
-void VirtualFluidSimulationImp::setPreProcessorFactory(std::shared_ptr<PreProcessorFactory> preProcessorFactory)
-{
-	this->preProcessorFactory = preProcessorFactory;
-}
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.h b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.h
deleted file mode 100644
index 492459052e56c36345e5650415b2691b4c1b1ed8..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef VIRTUAL_FLUID_SIMULATION_IMP_H
-#define VIRTUAL_FLUID_SIMULATION_IMP_H
-
-#include "VirtualFluidSimulation.h"
-
-#include <string>
-#include <memory>
-
-class CudaMemoryManager;
-class InitialCondition;
-class DataWriter;
-class Parameter;
-class GridProvider;
-class KernelConfiguration;
-class TestSimulation;
-class TimeTracking;
-class NumericalTestSuite;
-class KernelFactory;
-class PreProcessorFactory;
-
-class VirtualFluidSimulationImp : public VirtualFluidSimulation
-{
-public:
-	void run();
-
-	static std::shared_ptr<VirtualFluidSimulationImp> getNewInstance();
-
-	void setParameter(std::shared_ptr<Parameter> para);
-	void setCudaMemoryManager(std::shared_ptr<CudaMemoryManager> cudaManager);
-	void setGridProvider(std::shared_ptr<GridProvider> grid);
-	void setDataWriter(std::shared_ptr<DataWriter> dataWriter);
-	void setNumericalTestSuite(std::shared_ptr<NumericalTestSuite> numericalTestSuite);
-	void setTimeTracking(std::shared_ptr<TimeTracking> timeTracking);
-
-	void setKernelFactory(std::shared_ptr<KernelFactory> kernelFactory);
-	void setPreProcessorFactory(std::shared_ptr<PreProcessorFactory> preProcessorFactory);
-
-protected:
-	VirtualFluidSimulationImp() {};
-		
-private:
-	std::shared_ptr<Parameter> para;
-	std::shared_ptr<CudaMemoryManager> cudaManager;
-	std::shared_ptr<InitialCondition> initialCondition;
-	std::shared_ptr<GridProvider> grid;
-	std::shared_ptr<DataWriter> dataWriter;
-	std::shared_ptr<NumericalTestSuite> numericalTestSuite;
-	std::shared_ptr<TimeTracking> timeTracking;
-
-	std::shared_ptr<KernelFactory> kernelFactory;
-	std::shared_ptr<PreProcessorFactory> preProcessorFactory;
-};
-#endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..535a3972580726ccdd1536dd4b8011eba98176c1
--- /dev/null
+++ b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.cpp
@@ -0,0 +1,127 @@
+#include "VirtualFluidSimulationFactory.h"
+
+#include "Utilities/InitialCondition/InitialCondition.h"
+#include "Utilities/KernelConfiguration/KernelConfiguration.h"
+#include "Utilities/NumericalTestGridReader/NumericalTestGridReader.h"
+#include "Utilities/SimulationParameter/SimulationParameter.h"
+
+#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
+#include "VirtualFluids_GPU/Parameter/Parameter.h"
+
+#include "VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "VirtualFluids_GPU/LBM/Simulation.h"
+
+std::shared_ptr<Parameter> vf::gpu::tests::makeParameter(std::shared_ptr<SimulationParameter> simPara)
+{
+    auto para = std::make_shared<Parameter>(1, 0);
+
+    para->setQuadricLimiters(0.01, 0.01, 0.01);
+
+    para->setMaxDev(simPara->getDevices().size());
+    para->setDevices(simPara->getDevices());
+
+    std::string _prefix = "cells";
+    std::string gridPath = simPara->getGridPath() + "/";
+    para->setOutputPath(simPara->getFilePath());
+    para->setOutputPrefix(_prefix);
+    para->setPrintFiles(true);
+
+    para->setD3Qxx(27);
+    para->setMaxLevel(simPara->getNumberOfGridLevels());
+
+    para->setTimestepEnd(simPara->getEndTime());
+    para->setTimestepOut(simPara->getTimeStepLength());
+    para->setTimestepStartOut(1);
+
+    para->setViscosityLB(simPara->getViscosity());
+    para->setVelocityLB(simPara->getMaxVelocity());
+    para->setViscosityRatio(1.0);
+    para->setVelocityRatio(1.0);
+    para->setDensityRatio(1.0);
+    para->setFactorPressBC(100000.0);
+
+    para->setgeoVec(gridPath + "geoVec.dat");
+    para->setcoordX(gridPath + "coordX.dat");
+    para->setcoordY(gridPath + "coordY.dat");
+    para->setcoordZ(gridPath + "coordZ.dat");
+    para->setneighborX(gridPath + "neighborX.dat");
+    para->setneighborY(gridPath + "neighborY.dat");
+    para->setneighborZ(gridPath + "neighborZ.dat");
+    para->setneighborWSB(gridPath + "neighborWSB.dat");
+    para->setgeomBoundaryBcQs(gridPath + "geomBoundaryQs.dat");
+    para->setgeomBoundaryBcValues(gridPath + "geomBoundaryValues.dat");
+    para->setinletBcQs(gridPath + "inletBoundaryQs.dat");
+    para->setinletBcValues(gridPath + "inletBoundaryValues.dat");
+    para->setoutletBcQs(gridPath + "outletBoundaryQs.dat");
+    para->setoutletBcValues(gridPath + "outletBoundaryValues.dat");
+    para->settopBcQs(gridPath + "topBoundaryQs.dat");
+    para->settopBcValues(gridPath + "topBoundaryValues.dat");
+    para->setbottomBcQs(gridPath + "bottomBoundaryQs.dat");
+    para->setbottomBcValues(gridPath + "bottomBoundaryValues.dat");
+    para->setfrontBcQs(gridPath + "frontBoundaryQs.dat");
+    para->setfrontBcValues(gridPath + "frontBoundaryValues.dat");
+    para->setbackBcQs(gridPath + "backBoundaryQs.dat");
+    para->setbackBcValues(gridPath + "backBoundaryValues.dat");
+    para->setnumberNodes(gridPath + "numberNodes.dat");
+    para->setLBMvsSI(gridPath + "LBMvsSI.dat");
+    para->setscaleCFC(gridPath + "scaleCFC.dat");
+    para->setscaleCFF(gridPath + "scaleCFF.dat");
+    para->setscaleFCC(gridPath + "scaleFCC.dat");
+    para->setscaleFCF(gridPath + "scaleFCF.dat");
+    para->setscaleOffsetCF(gridPath + "offsetVecCF.dat");
+    para->setscaleOffsetFC(gridPath + "offsetVecFC.dat");
+    para->setCalcParticles(false);
+    para->setDiffOn(false);
+    para->setDoCheckPoint(false);
+    para->setDoRestart(false);
+    para->setUseGeometryValues(false);
+    para->setCalc2ndOrderMoments(false);
+    para->setCalc3rdOrderMoments(false);
+    para->setCalcHighOrderMoments(false);
+    para->setReadGeo(false);
+    para->setCalcMedian(false);
+    para->setConcFile(false);
+    para->setUseMeasurePoints(false);
+    para->setUseWale(false);
+    para->setSimulatePorousMedia(false);
+    para->setForcing(0.0, 0.0, 0.0);
+
+    std::vector<int> dist;
+    dist.resize(1);
+    dist[0] = 0;
+    para->setDistX(dist);
+    para->setDistY(dist);
+    para->setDistZ(dist);
+
+    // TODO: Find out if we still need this
+    // para->setNeedInterface(std::vector<bool>{true, true, true, true, true, true});
+
+    para->setMainKernel(simPara->getKernelConfiguration()->getMainKernel());
+    para->setMultiKernelOn(simPara->getKernelConfiguration()->getMultiKernelOn());
+    para->setMultiKernelLevel(simPara->getKernelConfiguration()->getMultiKernelLevel());
+    para->setMultiKernel(simPara->getKernelConfiguration()->getMultiKernel());
+
+    return para;
+}
+
+std::shared_ptr<NumericalTestGridReader> makeGridReader(std::shared_ptr<InitialCondition> initialCondition,
+                                                        std::shared_ptr<Parameter> para,
+                                                        std::shared_ptr<CudaMemoryManager> cudaManager)
+{
+    return NumericalTestGridReader::getNewInstance(para, initialCondition, cudaManager);
+}
+
+const std::function<void()> vf::gpu::tests::makeVirtualFluidSimulation(std::shared_ptr<Parameter> para,
+                                                                       std::shared_ptr<InitialCondition> condition,
+                                                                       std::shared_ptr<DataWriter> dataWriter)
+{
+    auto cudaManager = std::make_shared<CudaMemoryManager>(para);
+    auto grid = makeGridReader(condition, para, cudaManager);
+    BoundaryConditionFactory bc_factory;
+    auto simulation =
+        std::make_shared<Simulation>(para, cudaManager, vf::gpu::Communicator::getInstance(), *grid.get(), &bc_factory);
+    simulation->setDataWriter(dataWriter);
+
+    return [simulation]() { simulation->run(); };
+}
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h
index 1abda69dc2770d060d3551781df4f0b677fa61b4..0f77d8848e2152b5e9460967cd9de9f67ddd0e08 100644
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h
+++ b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h
@@ -1,15 +1,17 @@
-#ifndef VIRTUAL_FLUID_SIMULATION_FACTORY_H
-#define VIRTUAL_FLUID_SIMULATION_FACTORY_H
+#ifndef VIRTUAL_FLUID_SIMULATION_FACTORY_IMP_H
+#define VIRTUAL_FLUID_SIMULATION_FACTORY_IMP_H
 
+#include "Utilities/TestSimulation/TestSimulation.h"
+#include <functional>
 #include <memory>
 #include <vector>
 
-class VirtualFluidSimulation;
-class TestSimulation;
-
-class VirtualFluidSimulationFactory
+namespace vf::gpu::tests
 {
-public:
-	virtual std::vector<std::shared_ptr<VirtualFluidSimulation> > makeVirtualFluidSimulations(std::vector<std::shared_ptr<TestSimulation> > testSim) = 0;
-};
-#endif
+std::shared_ptr<Parameter> makeParameter(std::shared_ptr<SimulationParameter> simPara);
+const std::function<void()> makeVirtualFluidSimulation(std::shared_ptr<Parameter> para,
+                                                       std::shared_ptr<InitialCondition> condition,
+                                                       std::shared_ptr<DataWriter> dataWriter);
+} // namespace vf::gpu::tests
+
+#endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.cpp b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.cpp
deleted file mode 100644
index 243521cf6a05899dfda957fd247e9cb6598d36a9..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-#include "VirtualFluidSimulationFactoryImp.h"
-
-#include "Utilities/NumericalTestGridReader/NumericalTestGridReader.h"
-#include "Utilities/InitialCondition/InitialCondition.h"
-#include "Utilities/KernelConfiguration/KernelConfiguration.h"
-#include "Utilities/TestSimulation/TestSimulation.h"
-#include "Utilities/SimulationParameter/SimulationParameter.h"
-#include "Utilities/VirtualFluidSimulation/VirtualFluidSimulationImp.h"
-
-#include "VirtualFluids_GPU/GPU/CudaMemoryManager.h"
-#include "VirtualFluids_GPU/Parameter/Parameter.h"
-#include "VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
-#include "VirtualFluids_GPU/PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
-
-std::shared_ptr<VirtualFluidSimulationFactory> VirtualFluidSimulationFactoryImp::getNewInstance()
-{
-	return std::shared_ptr<VirtualFluidSimulationFactory>(new VirtualFluidSimulationFactoryImp());
-}
-
-VirtualFluidSimulationFactoryImp::VirtualFluidSimulationFactoryImp()
-{
-
-}
-
-std::shared_ptr<Parameter> VirtualFluidSimulationFactoryImp::makeParameter(std::shared_ptr<SimulationParameter> simPara)
-{
-	std::shared_ptr<Parameter> para = Parameter::make();
-
-	para->setMaxDev(simPara->getDevices().size());
-	para->setDevices(simPara->getDevices());
-	para->setNumprocs(1);
-
-	std::string _prefix = "cells";
-	std::string gridPath = simPara->getGridPath() + "/";
-	para->setFName(simPara->getFilePath() + "/" + _prefix);
-	para->setPrintFiles(true);
-
-	para->setD3Qxx(27);
-	para->setMaxLevel(simPara->getNumberOfGridLevels());
-
-	para->setTEnd(simPara->getEndTime());
-	para->setTOut(simPara->getTimeStepLength());
-	para->setTStartOut(1);
-
-	para->setViscosity(simPara->getViscosity());
-	para->setVelocity(simPara->getMaxVelocity());
-	para->setViscosityRatio(1.0);
-	para->setVelocityRatio(1.0);
-	para->setDensityRatio(1.0);
-	para->setFactorPressBC(100000.0);
-
-	para->setgeoVec(gridPath + "geoVec.dat");
-	para->setcoordX(gridPath + "coordX.dat");
-	para->setcoordY(gridPath + "coordY.dat");
-	para->setcoordZ(gridPath + "coordZ.dat");
-	para->setneighborX(gridPath + "neighborX.dat");
-	para->setneighborY(gridPath + "neighborY.dat");
-	para->setneighborZ(gridPath + "neighborZ.dat");
-	para->setneighborWSB(gridPath + "neighborWSB.dat");
-	para->setgeomBoundaryBcQs(gridPath + "geomBoundaryQs.dat");
-	para->setgeomBoundaryBcValues(gridPath + "geomBoundaryValues.dat");
-	para->setinletBcQs(gridPath + "inletBoundaryQs.dat");
-	para->setinletBcValues(gridPath + "inletBoundaryValues.dat");
-	para->setoutletBcQs(gridPath + "outletBoundaryQs.dat");
-	para->setoutletBcValues(gridPath + "outletBoundaryValues.dat");
-	para->settopBcQs(gridPath + "topBoundaryQs.dat");
-	para->settopBcValues(gridPath + "topBoundaryValues.dat");
-	para->setbottomBcQs(gridPath + "bottomBoundaryQs.dat");
-	para->setbottomBcValues(gridPath + "bottomBoundaryValues.dat");
-	para->setfrontBcQs(gridPath + "frontBoundaryQs.dat");
-	para->setfrontBcValues(gridPath + "frontBoundaryValues.dat");
-	para->setbackBcQs(gridPath + "backBoundaryQs.dat");
-	para->setbackBcValues(gridPath + "backBoundaryValues.dat");
-	para->setnumberNodes(gridPath + "numberNodes.dat");
-	para->setLBMvsSI(gridPath + "LBMvsSI.dat");
-	para->setscaleCFC(gridPath + "scaleCFC.dat");
-	para->setscaleCFF(gridPath + "scaleCFF.dat");
-	para->setscaleFCC(gridPath + "scaleFCC.dat");
-	para->setscaleFCF(gridPath + "scaleFCF.dat");
-	para->setscaleOffsetCF(gridPath + "offsetVecCF.dat");
-	para->setscaleOffsetFC(gridPath + "offsetVecFC.dat");
-	para->setCalcParticles(false);
-	para->setDiffOn(false);
-	para->setDoCheckPoint(false);
-	para->setDoRestart(false);
-	para->setGeometryValues(false);
-	para->setCalc2ndOrderMoments(false);
-	para->setCalc3rdOrderMoments(false);
-	para->setCalcHighOrderMoments(false);
-	para->setReadGeo(false);
-	para->setCalcMedian(false);
-	para->setConcFile(false);
-	para->setUseMeasurePoints(false);
-	para->setUseWale(false);
-	para->setSimulatePorousMedia(false);
-	para->setForcing(0.0, 0.0, 0.0);
-
-	std::vector<int> dist;
-	dist.resize(1);
-	dist[0] = 0;
-	para->setDistX(dist);
-	para->setDistY(dist);
-	para->setDistZ(dist);
-
-	para->setNeedInterface(std::vector<bool>{true, true, true, true, true, true});
-
-	para->setMainKernel(simPara->getKernelConfiguration()->getMainKernel());
-	para->setMultiKernelOn(simPara->getKernelConfiguration()->getMultiKernelOn());
-	para->setMultiKernelLevel(simPara->getKernelConfiguration()->getMultiKernelLevel());
-	para->setMultiKernel(simPara->getKernelConfiguration()->getMultiKernel());
-
-	return para;
-}
-
-std::shared_ptr<NumericalTestGridReader> VirtualFluidSimulationFactoryImp::makeGridReader(std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaManager)
-{
-	std::shared_ptr<NumericalTestGridReader> grid = NumericalTestGridReader::getNewInstance(para, initialCondition, cudaManager);
-	return grid;
-}
-
-std::shared_ptr<CudaMemoryManager> VirtualFluidSimulationFactoryImp::makeCudaMemoryManager(std::shared_ptr<Parameter> para)
-{
-	std::shared_ptr<CudaMemoryManager> cudaManager = CudaMemoryManager::make(para);
-	return cudaManager;
-}
-
-void VirtualFluidSimulationFactoryImp::initInitialConditions(std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<Parameter> para)
-{
-	initialCondition->setParameter(para);
-}
-
-std::vector<std::shared_ptr<VirtualFluidSimulation> > VirtualFluidSimulationFactoryImp::makeVirtualFluidSimulations(std::vector<std::shared_ptr<TestSimulation> > testSim)
-{
-	std::vector<std::shared_ptr<VirtualFluidSimulation> > vfSimulations;
-
-	std::shared_ptr<KernelFactoryImp> kernelFactory = KernelFactoryImp::getInstance();
-	std::shared_ptr<PreProcessorFactoryImp> preProcessorFactory = PreProcessorFactoryImp::getInstance();
-
-	for (int i = 0; i < testSim.size(); i++) {
-		std::shared_ptr<VirtualFluidSimulationImp> vfSim = VirtualFluidSimulationImp::getNewInstance();
-		
-		std::shared_ptr<Parameter> para = makeParameter(testSim.at(i)->getSimulationParameter());
-		vfSim->setParameter(para);
-		testSim.at(i)->setParameter(para);
-
-		std::shared_ptr<CudaMemoryManager> cudaManager = makeCudaMemoryManager(para);
-		vfSim->setCudaMemoryManager(cudaManager);
-
-		initInitialConditions(testSim.at(i)->getInitialCondition(), para);
-		std::shared_ptr<NumericalTestGridReader> grid = makeGridReader(testSim.at(i)->getInitialCondition(), para, cudaManager);
-		
-		vfSim->setGridProvider(grid);
-		vfSim->setDataWriter(testSim.at(i)->getDataWriter());
-		vfSim->setNumericalTestSuite(testSim.at(i));
-		vfSim->setTimeTracking(testSim.at(i)->getTimeTracking());
-
-		vfSim->setKernelFactory(kernelFactory);
-		vfSim->setPreProcessorFactory(preProcessorFactory);
-
-		vfSimulations.push_back(vfSim);		
-	}
-
-	return vfSimulations;
-}
diff --git a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.h b/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.h
deleted file mode 100644
index 07e6b6ebb924a6b4ff940befd7ba8e266e9b160a..0000000000000000000000000000000000000000
--- a/apps/gpu/tests/NumericalTests/Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef VIRTUAL_FLUID_SIMULATION_FACTORY_IMP_H
-#define VIRTUAL_FLUID_SIMULATION_FACTORY_IMP_H
-
-#include "VirtualFluidSimulationFactory.h"
-
-class CudaMemoryManager;
-class NumericalTestGridReader;
-class InitialCondition;
-class Parameter;
-class SimulationParameter;
-
-class VirtualFluidSimulationFactoryImp: public VirtualFluidSimulationFactory
-{
-public:
-	static std::shared_ptr<VirtualFluidSimulationFactory> getNewInstance();
-	std::vector<std::shared_ptr<VirtualFluidSimulation> > makeVirtualFluidSimulations(std::vector<std::shared_ptr<TestSimulation> > testSim);
-
-protected:
-	VirtualFluidSimulationFactoryImp();
-	
-	std::shared_ptr<Parameter> makeParameter(std::shared_ptr<SimulationParameter> simPara);
-	std::shared_ptr<NumericalTestGridReader> makeGridReader(std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaManager);
-	std::shared_ptr<CudaMemoryManager> makeCudaMemoryManager(std::shared_ptr<Parameter> para);
-	void initInitialConditions(std::shared_ptr<InitialCondition> initialCondition, std::shared_ptr<Parameter> para);
-
-private:
-
-};
-#endif
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/config.txt b/apps/gpu/tests/NumericalTests/config.txt
index 783e6ebbb6d0550f08e39b363a1550c1fe814567..fb88cd424f710c92d9e9f15a9de5deb98a2d85f7 100644
--- a/apps/gpu/tests/NumericalTests/config.txt
+++ b/apps/gpu/tests/NumericalTests/config.txt
@@ -1,39 +1,39 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="0"
+Devices=0
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK15Comp"
+KernelsToTest=CumulantK15Comp
 #CumulantK17Comp
 #BGKCompSP27 BGKPlusCompSP27 CumulantAA2016CompSP27 CumulantOneCompSP27 CumulantF3CompSP27 CumulantF32018CompSP27 CumulantAll4CompSP27
 NumberOfTimeSteps=20
-Viscosity="0.001"
+Viscosity=0.001
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
 DataToCalcTests_SW="Vy"
 
@@ -65,8 +65,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -77,7 +77,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -115,7 +115,7 @@ GridPath512="E:\temp\numericalTests\grids\gridUni512x4x768"
 ##################################################
 # 			File Writing Information			 #
 ##################################################
-WriteVTKFiles=false
+WriteVTKFiles=true
 PathForVTKFileWriting="E:\temp\numericalTests\Output"
 StartStepFileWriter=0
 
diff --git a/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt b/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt
index 085fde0c134fc6005ca82dacf2296479953756d7..435931bf0fac794dc203bf9c0242b2510d80d739 100644
--- a/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt
+++ b/apps/gpu/tests/NumericalTests/configK15_nu10tm2.txt
@@ -1,41 +1,41 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="0"
+Devices=0
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK15Comp"
+KernelsToTest=CumulantK15Comp
 #CumulantK17Comp
 #BGKCompSP27 BGKPlusCompSP27 CumulantAA2016CompSP27 CumulantOneCompSP27 CumulantF3CompSP27 CumulantF32018CompSP27 CumulantAll4CompSP27
 NumberOfTimeSteps=20
-Viscosity="0.01"
+Viscosity=0.01
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
-DataToCalcTests_SW="Vy"
+DataToCalcTests_SW=Vy
 
 ##################################################
 #			   Basic Test Parameter				 #
@@ -65,8 +65,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -77,7 +77,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -86,7 +86,7 @@ NormalizeWith=amplitude
 TaylorGreenVortexUx32=true
 TaylorGreenVortexUx64=true
 TaylorGreenVortexUx128=true
-TaylorGreenVortexUx256=false
+TaylorGreenVortexUx256=true
 TaylorGreenVortexUx512=false
 
 TaylorGreenVortexUz32=false
@@ -95,9 +95,9 @@ TaylorGreenVortexUz128=false
 TaylorGreenVortexUz256=false
 TaylorGreenVortexUz512=false
 
-ShearWave32=false
-ShearWave64=false
-ShearWave128=false
+ShearWave32=true
+ShearWave64=true
+ShearWave128=true
 ShearWave256=false
 ShearWave512=false
 
@@ -106,19 +106,19 @@ ShearWave512=false
 ##################################################
 NumberOfGridLevels=1
 
-GridPath32="grids/gridUni32x4x48"
-GridPath64="grids/gridUni64x4x96"
-GridPath128="grids/gridUni128x4x192"
-GridPath256="grids/gridUni256x4x384"
-GridPath512="grids/gridUni512x4x768"
+GridPath32=grids/gridUni32x4x48
+GridPath64=grids/gridUni64x4x96
+GridPath128=grids/gridUni128x4x192
+GridPath256=grids/gridUni256x4x384
+GridPath512=grids/gridUni512x4x768
 
 ##################################################
 # 			File Writing Information			 #
 ##################################################
 WriteVTKFiles=false
-FolderForVTKFileWriting="Output"
+FolderForVTKFileWriting=Output
 StartStepFileWriter=0
 
-WriteAnalyResultsToVTK=true
+WriteAnalyResultsToVTK=false
 
-FolderLogFile="logFiles"
\ No newline at end of file
+FolderLogFile=logFiles
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/configK15_nu10tm3.txt b/apps/gpu/tests/NumericalTests/configK15_nu10tm3.txt
index eb27bc5f4a1f937721d2886b0fb599a5b1c7d03c..401f390ebbe3b2c7044699831bf802296123b6ca 100644
--- a/apps/gpu/tests/NumericalTests/configK15_nu10tm3.txt
+++ b/apps/gpu/tests/NumericalTests/configK15_nu10tm3.txt
@@ -1,41 +1,41 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="1"
+Devices=1
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK15Comp"
+KernelsToTest=CumulantK15Comp
 #CumulantK17Comp
 #BGKCompSP27 BGKPlusCompSP27 CumulantAA2016CompSP27 CumulantOneCompSP27 CumulantF3CompSP27 CumulantF32018CompSP27 CumulantAll4CompSP27
 NumberOfTimeSteps=20
-Viscosity="0.001"
+Viscosity=0.001
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
-DataToCalcTests_SW="Vy"
+DataToCalcTests_SW=Vy
 
 ##################################################
 #			   Basic Test Parameter				 #
@@ -65,8 +65,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -77,7 +77,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -106,19 +106,19 @@ ShearWave512=true
 ##################################################
 NumberOfGridLevels=1
 
-GridPath32="grids/gridUni32x4x48"
-GridPath64="grids/gridUni64x4x96"
-GridPath128="grids/gridUni128x4x192"
-GridPath256="grids/gridUni256x4x384"
-GridPath512="grids/gridUni512x4x768"
+GridPath32=grids/gridUni32x4x48
+GridPath64=grids/gridUni64x4x96
+GridPath128=grids/gridUni128x4x192
+GridPath256=grids/gridUni256x4x384
+GridPath512=grids/gridUni512x4x768
 
 ##################################################
 # 			File Writing Information			 #
 ##################################################
-WriteVTKFiles=false
-FolderForVTKFileWriting="Output"
+WriteVTKFiles=true
+FolderForVTKFileWriting=Output
 StartStepFileWriter=0
 
 WriteAnalyResultsToVTK=true
 
-FolderLogFile="logFiles"
\ No newline at end of file
+FolderLogFile=logFiles
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/configK15_nu10tm4.txt b/apps/gpu/tests/NumericalTests/configK15_nu10tm4.txt
index d0ceb823b33fcc143f27d2ef9abcaee437daea2b..985040a272642ca6c426cd8e6a00f91c263449e6 100644
--- a/apps/gpu/tests/NumericalTests/configK15_nu10tm4.txt
+++ b/apps/gpu/tests/NumericalTests/configK15_nu10tm4.txt
@@ -1,41 +1,41 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="1"
+Devices=1
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK15Comp"
+KernelsToTest=CumulantK15Comp
 #CumulantK17Comp
 #BGKCompSP27 BGKPlusCompSP27 CumulantAA2016CompSP27 CumulantOneCompSP27 CumulantF3CompSP27 CumulantF32018CompSP27 CumulantAll4CompSP27
 NumberOfTimeSteps=20
-Viscosity="0.0001"
+Viscosity=0.0001
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
-DataToCalcTests_SW="Vy"
+DataToCalcTests_SW=Vy
 
 ##################################################
 #			   Basic Test Parameter				 #
@@ -65,8 +65,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -77,7 +77,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -106,19 +106,19 @@ ShearWave512=true
 ##################################################
 NumberOfGridLevels=1
 
-GridPath32="grids/gridUni32x4x48"
-GridPath64="grids/gridUni64x4x96"
-GridPath128="grids/gridUni128x4x192"
-GridPath256="grids/gridUni256x4x384"
-GridPath512="grids/gridUni512x4x768"
+GridPath32=grids/gridUni32x4x48
+GridPath64=grids/gridUni64x4x96
+GridPath128=grids/gridUni128x4x192
+GridPath256=grids/gridUni256x4x384
+GridPath512=grids/gridUni512x4x768
 
 ##################################################
 # 			File Writing Information			 #
 ##################################################
-WriteVTKFiles=false
-FolderForVTKFileWriting="Output"
+WriteVTKFiles=true
+FolderForVTKFileWriting=Output
 StartStepFileWriter=0
 
 WriteAnalyResultsToVTK=true
 
-FolderLogFile="logFiles"
\ No newline at end of file
+FolderLogFile=logFiles
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/configK15_nu10tm5.txt b/apps/gpu/tests/NumericalTests/configK15_nu10tm5.txt
index 1b0b7d786efb2f0bec1718359c5b5cb603578bc0..9767e00e53d22c36405bace31479349b3fff5bcc 100644
--- a/apps/gpu/tests/NumericalTests/configK15_nu10tm5.txt
+++ b/apps/gpu/tests/NumericalTests/configK15_nu10tm5.txt
@@ -1,41 +1,41 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="1"
+Devices=1
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK15Comp"
+KernelsToTest=CumulantK15Comp
 #CumulantK17Comp
 #BGKCompSP27 BGKPlusCompSP27 CumulantAA2016CompSP27 CumulantOneCompSP27 CumulantF3CompSP27 CumulantF32018CompSP27 CumulantAll4CompSP27
 NumberOfTimeSteps=20
-Viscosity="0.00001"
+Viscosity=0.00001
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
-DataToCalcTests_SW="Vy"
+DataToCalcTests_SW=Vy
 
 ##################################################
 #			   Basic Test Parameter				 #
@@ -65,8 +65,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -77,7 +77,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -106,19 +106,19 @@ ShearWave512=true
 ##################################################
 NumberOfGridLevels=1
 
-GridPath32="grids/gridUni32x4x48"
-GridPath64="grids/gridUni64x4x96"
-GridPath128="grids/gridUni128x4x192"
-GridPath256="grids/gridUni256x4x384"
-GridPath512="grids/gridUni512x4x768"
+GridPath32=grids/gridUni32x4x48
+GridPath64=grids/gridUni64x4x96
+GridPath128=grids/gridUni128x4x192
+GridPath256=grids/gridUni256x4x384
+GridPath512=grids/gridUni512x4x768
 
 ##################################################
 # 			File Writing Information			 #
 ##################################################
-WriteVTKFiles=false
-FolderForVTKFileWriting="Output"
+WriteVTKFiles=true
+FolderForVTKFileWriting=Output
 StartStepFileWriter=0
 
 WriteAnalyResultsToVTK=true
 
-FolderLogFile="logFiles"
\ No newline at end of file
+FolderLogFile=logFiles
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt b/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt
index 0cceea9a7f6f6c30e3df6f6802f182c893774522..6ca01d34f0d8fb224f67cdf786160c30dd2bbd97 100644
--- a/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt
+++ b/apps/gpu/tests/NumericalTests/configK17chim_nu10tm3.txt
@@ -1,39 +1,39 @@
 ##################################################
 #				 GPU Mapping					 #
 ##################################################
-Devices="1"
+Devices=0
 
 ##################################################
 #	       Basic Simulation Parameter			 #
 ##################################################
-KernelsToTest="CumulantK17CompChim"
+KernelsToTest=CumulantK17CompChim
 NumberOfTimeSteps=20
-Viscosity="0.001"
+Viscosity=0.001
 Rho0=1.0
 
 ##################################################
 #	    TaylorGreenVortex Parameter				 #
 ##################################################
-BasisTimeStepLength_TGV_Ux="1000"
-ux_TGV_Ux="0.096"
-Amplitude_TGV_Ux="0.001"
+BasisTimeStepLength_TGV_Ux=1000
+ux_TGV_Ux=0.096
+Amplitude_TGV_Ux=0.001
 l0_TGV_Ux=32
-DataToCalcTests_TGV_Ux="Vx Vz"
+DataToCalcTests_TGV_Ux=Vx Vz
 
-BasisTimeStepLength_TGV_Uz="1000"
-uz_TGV_Uz="0.048"
-Amplitude_TGV_Uz="0.001"
+BasisTimeStepLength_TGV_Uz=1000
+uz_TGV_Uz=0.048
+Amplitude_TGV_Uz=0.001
 l0_TGV_Uz=48
-DataToCalcTests_TGV_Uz="Vx Vz"
+DataToCalcTests_TGV_Uz=Vx Vz
 
 ##################################################
 #	       Shear Wave Parameter					 #
 ##################################################
-BasisTimeStepLength_SW="1000"
-u0_SW="0.096"
-v0_SW="0.1"
+BasisTimeStepLength_SW=1000
+u0_SW=0.096
+v0_SW=0.1
 l0_SW=32
-DataToCalcTests_SW="Vy"
+DataToCalcTests_SW=Vy
 
 ##################################################
 #			   Basic Test Parameter				 #
@@ -63,8 +63,8 @@ EndTimeStepCalculation_Ny=20
 ##################################################
 L2NormTest=false
 
-NormalizeData_L2Norm="Amplitude BasicData"
-MaxL2NormDiff="4.5 0.5"
+NormalizeData_L2Norm=Amplitude BasicData
+MaxL2NormDiff=4.5 0.5
 
 BasicTimeStep_L2=0
 DivergentTimeStep_L2=20
@@ -75,7 +75,7 @@ DivergentTimeStep_L2=20
 L2NormBetweenKernelsTest=false
 
 BasicKernel_L2NormBetweenKernels=BGKCompSP27
-Timesteps_L2NormBetweenKernels="0 10 20"
+Timesteps_L2NormBetweenKernels=0 10 20
 NormalizeWith=amplitude
 
 ##################################################
@@ -84,8 +84,8 @@ NormalizeWith=amplitude
 TaylorGreenVortexUx32=true
 TaylorGreenVortexUx64=true
 TaylorGreenVortexUx128=true
-TaylorGreenVortexUx256=true
-TaylorGreenVortexUx512=true
+TaylorGreenVortexUx256=false
+TaylorGreenVortexUx512=false
 
 TaylorGreenVortexUz32=false
 TaylorGreenVortexUz64=false
@@ -93,30 +93,30 @@ TaylorGreenVortexUz128=false
 TaylorGreenVortexUz256=false
 TaylorGreenVortexUz512=false
 
-ShearWave32=true
-ShearWave64=true
-ShearWave128=true
-ShearWave256=true
-ShearWave512=true
+ShearWave32=false
+ShearWave64=false
+ShearWave128=false
+ShearWave256=false
+ShearWave512=false
 
 ##################################################
 #				Grid Information				 #
 ##################################################
 NumberOfGridLevels=1
 
-GridPath32="grids/gridUni32x4x48"
-GridPath64="grids/gridUni64x4x96"
-GridPath128="grids/gridUni128x4x192"
-GridPath256="grids/gridUni256x4x384"
-GridPath512="grids/gridUni512x4x768"
+GridPath32=grids/gridUni32x4x48
+GridPath64=grids/gridUni64x4x96
+GridPath128=grids/gridUni128x4x192
+GridPath256=grids/gridUni256x4x384
+GridPath512=grids/gridUni512x4x768
 
 ##################################################
 # 			File Writing Information			 #
 ##################################################
-WriteVTKFiles=false
-FolderForVTKFileWriting="Output"
+WriteVTKFiles=true
+FolderForVTKFileWriting=Output
 StartStepFileWriter=0
 
-WriteAnalyResultsToVTK=true
+WriteAnalyResultsToVTK=false
 
-FolderLogFile="logFiles"
\ No newline at end of file
+FolderLogFile=logFiles
\ No newline at end of file
diff --git a/apps/gpu/tests/NumericalTests/main.cpp b/apps/gpu/tests/NumericalTests/main.cpp
index 703e8961497f3f1c93cb460fb9937f391e4ea0e9..fda9d14a7b752eab2585b300ee6aef606437913e 100644
--- a/apps/gpu/tests/NumericalTests/main.cpp
+++ b/apps/gpu/tests/NumericalTests/main.cpp
@@ -1,143 +1,44 @@
-#include <mpi.h>
 #include <gmock/gmock.h>
+#include <mpi.h>
 
 #include "Utilities/ConfigFileReaderNT/ConfigFileReaderNT.h"
 #include "Utilities/LogFileQueue/LogFileQueue.h"
 #include "Utilities/NumericalTestFactory/NumericalTestFactoryImp.h"
 #include "Utilities/TestQueue/TestQueue.h"
-#include "Utilities/VirtualFluidSimulation/VirtualFluidSimulation.h"
-#include "Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactoryImp.h"
+#include "Utilities/VirtualFluidSimulationFactory/VirtualFluidSimulationFactory.h"
 
-//validation
+// validation
+#include "Utilities/Calculator/FFTCalculator/FFTCalculator.h"
 #include <fstream>
 #include <iostream>
-#include "Utilities/Calculator/FFTCalculator/FFTCalculator.h"
 
-static void validateTestSuite()
+static TestSuiteResult startNumericalTests(const std::string &configFile)
 {
-	const int timeSteps = 10;
-	const int begin = 11;
-	const int end = 20;
-	const int l0 = 32;
-	const double viscosity = 0.001;
-	std::string kernelName = "Cum";
-
-	std::vector<int> xLength{32,64,128,256,512};
-	std::vector<int> zLength(xLength.size());
-	std::vector<int> timeStepLength(xLength.size());
-	for (int i = 0; i < xLength.size(); i++) {
-		zLength.at(i) = xLength.at(i) * 3 / 2;
-		timeStepLength.at(i) = (int)1000 * xLength.at(i)*xLength.at(i) / l0 / l0;
-	}
-		
-	std::vector<std::vector<std::ostringstream>> filePaths;
-	filePaths.resize(xLength.size());
-	
-	for (int j = 0; j < xLength.size(); j++) {
-		filePaths.at(j).resize(timeSteps);
-		for (int i = begin; i <= end; i++)
-			filePaths.at(j).at(i - begin) << "C:/Users/Timon/Desktop/Auswertung_TGV_10hm3_X_CumD3Q27F3/Auswertung_TGV_10hm3_X_CumD3Q27F3/" << kernelName << "_" << xLength.at(j) << "_3_" << zLength.at(j) << "_AD_X_" << i*timeStepLength.at(j) << ".dat";
-
-	}
-	std::vector<std::vector<std::vector<double>>> dataForOneSimulationGroup;
-	
-	for (int j = 0; j < filePaths.size(); j++) {
-		std::vector<std::vector<double>> dataForOneSimulation;
-		dataForOneSimulation.resize(timeSteps);
-		for (int i = 0; i < filePaths.at(j).size(); i++) {
-			std::ifstream file;
-			file.open(filePaths.at(j).at(i).str());
-
-			if (file.is_open()) {
-				double data = 0.0;
-				while (file >> data)
-					dataForOneSimulation.at(i).push_back(data);
-
-				file.close();
-			}
-			else
-				int stop = 1;
-		}
-		dataForOneSimulationGroup.push_back(dataForOneSimulation);
-	}
-
-	std::shared_ptr<FFTCalculator> calulator = FFTCalculator::getInstance();
-
-	std::vector<double> phiDifForOneSimGroup;
-	std::vector<double> nyDifForOneSimGroup;
-	for (int i = 0; i < dataForOneSimulationGroup.size(); i++) {
-		int timeStepLength = 1000 * xLength.at(i)*xLength.at(i) / l0 / l0;
-
-		double phiDiff = calulator->calcPhiDiff(dataForOneSimulationGroup.at(i), false, xLength.at(i), zLength.at(i), timeStepLength);
-		double ny = calulator->calcNy(dataForOneSimulationGroup.at(i), false, xLength.at(i), zLength.at(i), timeStepLength);
-		double nyDiff = abs(ny - viscosity) / viscosity;
-		phiDifForOneSimGroup.push_back(phiDiff);
-		nyDifForOneSimGroup.push_back(nyDiff);
-	}
-
-
+    auto configData = vf::gpu::tests::readConfigFile(configFile);
 
-	std::fstream dataOutPhi;
-	std::string dataOutFilePathPhi = "C:/Users/Timon/Desktop/Auswertung_TGV_10hm3_X_CumD3Q27F3/NumericalTestAuswertung/" + kernelName + "_PhiDiff.dat";
-	dataOutPhi.open(dataOutFilePathPhi, std::ios::out);
+    std::shared_ptr<NumericalTestFactoryImp> numericalTestFactory = NumericalTestFactoryImp::getNewInstance(configData);
 
-	std::fstream dataOutNy;
-	std::string dataOutFilePathNy = "C:/Users/Timon/Desktop/Auswertung_TGV_10hm3_X_CumD3Q27F3/NumericalTestAuswertung/" + kernelName + "_NyDiff.dat";
-	dataOutNy.open(dataOutFilePathNy, std::ios::out);
+    std::shared_ptr<TestQueue> testQueue = numericalTestFactory->getTestQueue();
+    std::shared_ptr<LogFileQueue> logFileQueue = numericalTestFactory->getLogFileQueue();
 
-	for (int i = 0; i < phiDifForOneSimGroup.size(); i++) {
-		dataOutPhi << std::fixed << std::setprecision(std::numeric_limits<double>::digits10 + 1);
-		dataOutPhi << phiDifForOneSimGroup.at(i);
-		dataOutNy << std::fixed << std::setprecision(std::numeric_limits<double>::digits10 + 1);
-		dataOutNy << nyDifForOneSimGroup.at(i);
+    auto result = testQueue->run();
+    logFileQueue->writeLogFiles();
 
-		if (i < phiDifForOneSimGroup.size() - 1) {
-			dataOutPhi << std::endl;
-			dataOutNy << std::endl;
-		}
-	}
-
-	dataOutPhi.close();
-}
-
-
-static bool startNumericalTests(const std::string &configFile)
-{
-	std::shared_ptr<ConfigFileReader> configReader = ConfigFileReader::getNewInstance(configFile);
-	configReader->readConfigFile();
-
-	std::shared_ptr<NumericalTestFactoryImp> numericalTestFactory = NumericalTestFactoryImp::getNewInstance(configReader->getConfigData());
-
-	std::vector<std::shared_ptr<TestSimulation> > testSim = numericalTestFactory->getTestSimulations();
-	std::shared_ptr<TestQueue> testQueue = numericalTestFactory->getTestQueue();
-	std::shared_ptr<LogFileQueue> logFileQueue = numericalTestFactory->getLogFileQueue();
-
-	std::shared_ptr<VirtualFluidSimulationFactory> factory = VirtualFluidSimulationFactoryImp::getNewInstance();
-	std::vector<std::shared_ptr<VirtualFluidSimulation> > vfSimulations = factory->makeVirtualFluidSimulations(testSim);
-
-	for (int i = 0; i < vfSimulations.size(); i++)
-		vfSimulations.at(i)->run();
-
-	testQueue->makeFinalOutput();
-	logFileQueue->writeLogFiles();
-
-	return testQueue->getNumberOfFailedTests() > 0;
+    return result;
 }
 
 int main(int argc, char **argv)
 {
-	MPI_Init(&argc, &argv);
-
-	//validateTestSuite();
+    MPI_Init(&argc, &argv);
 
-	bool tests_passed{false};
+    auto tests_passed = TestSuiteResult::FAILED;
 
-	if (argc > 1)
+    if (argc > 1)
         tests_passed = startNumericalTests(argv[1]);
-	else
-		std::cout << "Configuration file must be set!: lbmgm <config file>" << std::endl << std::flush;
+    else
+        std::cout << "Configuration file must be set!: lbmgm <config file>" << std::endl << std::flush;
 
     MPI_Finalize();
 
-	return tests_passed;
+    return tests_passed;
 }
diff --git a/gpu.cmake b/gpu.cmake
index 8b98763d8fdb4e1687bcfccbacd722ddd001a53e..5b175ca2a5fe7d289bd948e905ada612413333d2 100644
--- a/gpu.cmake
+++ b/gpu.cmake
@@ -25,24 +25,9 @@ add_subdirectory(src/gpu/GridGenerator)
 IF (BUILD_VF_GPU)
     add_subdirectory(src/gpu/VirtualFluids_GPU)
 
-    #add_subdirectory(targets/apps/LBM/lbmTest)
-    #add_subdirectory(targets/apps/LBM/metisTest)
-    #add_subdirectory(targets/apps/LBM/Basel)
-    #add_subdirectory(targets/apps/LBM/BaselNU)
-    #add_subdirectory(targets/apps/LBM/BaselMultiGPU)
-
     add_subdirectory(apps/gpu/LBM/DrivenCavity)
     add_subdirectory(apps/gpu/LBM/SphereGPU)
-    #add_subdirectory(apps/gpu/LBM/WTG_RUB)
-    #add_subdirectory(apps/gpu/LBM/gridGeneratorTest)
-    #add_subdirectory(apps/gpu/LBM/TGV_3D)
-    #add_subdirectory(apps/gpu/LBM/TGV_3D_MultiGPU)
-    #add_subdirectory(apps/gpu/LBM/SphereScaling)
-    #add_subdirectory(apps/gpu/LBM/DrivenCavityMultiGPU)
-    #add_subdirectory(apps/gpu/LBM/MusselOyster)
-    #add_subdirectory(apps/gpu/LBM/Poiseuille)
-    #add_subdirectory(apps/gpu/LBM/ActuatorLine)
-    #add_subdirectory(apps/gpu/LBM/BoundaryLayer)
+    add_subdirectory(apps/gpu/LBM/BoundaryLayer)
 ELSE()
     MESSAGE( STATUS "exclude Virtual Fluids GPU." )
 ENDIF()
@@ -51,50 +36,13 @@ ENDIF()
 ###                  Virtual Fluids GKS                   ###
 #############################################################
 
-
 IF (BUILD_VF_GKS)
     add_subdirectory(src/gpu/GksMeshAdapter)
     add_subdirectory(src/gpu/GksVtkAdapter)
 
     add_subdirectory(src/gpu/GksGpu)
 
-    #add_subdirectory(targets/apps/GKS/gksTest)
-    #add_subdirectory(targets/apps/GKS/ChannelFlow)
-
-    #add_subdirectory(targets/apps/GKS/ChannelFlowObstacle)
-    #add_subdirectory(targets/apps/GKS/ShearWave)
-
-    #add_subdirectory(targets/apps/GKS/LiFuXu)
-
-    #add_subdirectory(targets/apps/GKS/TaylorGreen3D)
-    #add_subdirectory(targets/apps/GKS/DrivenCavity3D)
-    #add_subdirectory(targets/apps/GKS/ThermalCavity)
-
-    #add_subdirectory(targets/apps/GKS/ThermalCavityMultiGPU)
-    #add_subdirectory(targets/apps/GKS/DrivenCavityMultiGPU)
-    #add_subdirectory(targets/apps/GKS/RayleighBenardMultiGPU)
-
-    #add_subdirectory(targets/apps/GKS/SalinasVazquez)
-    #add_subdirectory(targets/apps/GKS/BoundaryJet)
-
-    #add_subdirectory(targets/apps/GKS/PropaneFlame)
-    #add_subdirectory(targets/apps/GKS/ConfinedCombustion)
-    #add_subdirectory(targets/apps/GKS/MethaneFlame)
-
-    #add_subdirectory(targets/apps/GKS/Room)
-    #add_subdirectory(targets/apps/GKS/RoomMultiGPU)
-    #add_subdirectory(targets/apps/GKS/RoomFire)
-    #add_subdirectory(targets/apps/GKS/RoomFireExtended)
-    #add_subdirectory(targets/apps/GKS/ConcreteHeatFluxBCTest)
-
-    #add_subdirectory(targets/apps/GKS/PoolFire)
     add_subdirectory(apps/gpu/GKS/Flame7cm)
-    #add_subdirectory(targets/apps/GKS/SandiaFlame_1m)
-    #add_subdirectory(targets/apps/GKS/Candle)
-
-    #add_subdirectory(targets/apps/GKS/MultiGPU)
-    #add_subdirectory(targets/apps/GKS/MultiGPU_nD)
-    #add_subdirectory(targets/apps/GKS/SingleGPU)
 ELSE()
     MESSAGE( STATUS "exclude Virtual Fluids GKS." )
 ENDIF()
@@ -130,7 +78,7 @@ if(BUILD_NUMERIC_TESTS)
 endif()
 
 #############################################################
-###					Annas Traffic Sim				      ###
+###                 Annas Traffic Sim                     ###
 #############################################################
 
 if(BUILD_VF_TRAFFIC)
diff --git a/pythonbindings/src/gpu/submodules/parameter.cpp b/pythonbindings/src/gpu/submodules/parameter.cpp
index 0ea87924bd0a2fc2b3a6bc343cad5948febe2ffe..7b4e67f101e3928abbd4262557864ea1d0f45b02 100644
--- a/pythonbindings/src/gpu/submodules/parameter.cpp
+++ b/pythonbindings/src/gpu/submodules/parameter.cpp
@@ -41,6 +41,9 @@ namespace parameter
         .def("set_density_ratio", &Parameter::setDensityRatio)
         .def("set_devices", &Parameter::setDevices)
         .def("set_is_body_force", &Parameter::setIsBodyForce)
+        .def("set_use_AMD", &Parameter::setUseAMD)
+        .def("set_use_Wale", &Parameter::setUseWale)
+        .def("set_SGS_constant", &Parameter::setSGSConstant)
         .def("set_main_kernel", &Parameter::setMainKernel)
         .def("set_AD_kernel", &Parameter::setADKernel)
         .def("set_use_AMD", &Parameter::setUseAMD)
diff --git a/src/cuda/CudaGrid.cpp b/src/cuda/CudaGrid.cpp
index 9590452e107d17e69dd77b7159c20ca009f01a4c..48ac86942d938b223e95759942676f862b614fa1 100644
--- a/src/cuda/CudaGrid.cpp
+++ b/src/cuda/CudaGrid.cpp
@@ -5,20 +5,9 @@
 namespace vf::cuda
 {
 
-CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int numberOfEntities)
+CudaGrid::CudaGrid(unsigned int numberOfThreads, unsigned int numberOfEntities): grid {getCudaGrid( numberOfThreads, numberOfEntities)},
+    threads{dim3(numberOfThreads, 1, 1)}
 {
-    unsigned int Grid = (numberOfEntities / numberOfThreads) + 1;
-    unsigned int Grid1, Grid2;
-    if (Grid > 512) {
-        Grid1 = 512;
-        Grid2 = (Grid / Grid1) + 1;
-    } else {
-        Grid1 = 1;
-        Grid2 = Grid;
-    }
-    
-    grid = dim3(Grid1, Grid2);
-    threads = dim3(numberOfThreads, 1, 1);
 }
 
 void CudaGrid::print() const
diff --git a/src/gpu/GridGenerator/CMakeLists.txt b/src/gpu/GridGenerator/CMakeLists.txt
index 7483d276d08bb66b8c6948689f1dbb13f8846cb6..8102ad3a10b53dded2ba6fe489753f20d1d2ed4f 100644
--- a/src/gpu/GridGenerator/CMakeLists.txt
+++ b/src/gpu/GridGenerator/CMakeLists.txt
@@ -5,4 +5,9 @@ vf_add_tests()
 
 if(NOT MSVC) 
    target_compile_options(GridGenerator PRIVATE "-Wno-strict-aliasing")
+endif()
+
+
+if(BUILD_VF_UNIT_TESTS)
+   target_include_directories(GridGeneratorTests PRIVATE "${VF_ROOT_DIR}/src/basics/")
 endif()
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBoxTest.cpp b/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBoxTest.cpp
index 67ac560b91f0ae712dfbc67d22676d8af8fc9afe..3d94d8fbcadb7d9803184885e67d1cb31e906d86 100644
--- a/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBoxTest.cpp
+++ b/src/gpu/GridGenerator/geometries/BoundingBox/BoundingBoxTest.cpp
@@ -31,15 +31,15 @@ TEST(BoundingBoxExactTest, findMinMaxFromTriangle)
     Vertex normal = Vertex(0.0f, 0.0f, 0.0f);
     Triangle t = Triangle(v1, v2, v3, normal);
 
-	box.setMinMax(t);
-
-	EXPECT_THAT(box.minX, RealEq(minX));
-	EXPECT_THAT(box.minY, RealEq(minY));
-	EXPECT_THAT(box.minZ, RealEq(minZ));
-	
-	EXPECT_THAT(box.maxX, RealEq(maxX));
-	EXPECT_THAT(box.maxY, RealEq(maxY));
-	EXPECT_THAT(box.maxZ, RealEq(maxZ));
+    box.setMinMax(t);
+
+    EXPECT_THAT(box.minX, RealEq(minX));
+    EXPECT_THAT(box.minY, RealEq(minY));
+    EXPECT_THAT(box.minZ, RealEq(minZ));
+    
+    EXPECT_THAT(box.maxX, RealEq(maxX));
+    EXPECT_THAT(box.maxY, RealEq(maxY));
+    EXPECT_THAT(box.maxZ, RealEq(maxZ));
 }
 
 TEST(BoundingBoxTest, isInside_true)
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
index a11384887074aa6b42bff77dd8b7ee1ade8fc9e0..883ca0deaf34f45e4608c4e59908b4562932db77 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.cpp
@@ -58,9 +58,9 @@ TriangularMesh* TriangularMesh::make(const std::string& fileName, const std::vec
 
 TriangularMesh::TriangularMesh(const std::string& input, const BoundingBox& box)
 {
-	this->triangleVec = STLReader::readSTL(box, input);
-	initalizeDataFromTriangles();
-	this->findNeighbors();
+    this->triangleVec = STLReader::readSTL(box, input);
+    initalizeDataFromTriangles();
+    this->findNeighbors();
 }
 
 TriangularMesh::TriangularMesh(const std::string& inputPath, const std::vector<uint> ignorePatches)
@@ -76,12 +76,7 @@ TriangularMesh::TriangularMesh(const std::string& inputPath, const std::vector<u
 
 TriangularMesh::TriangularMesh()
 {
-	this->minmax = BoundingBox::makeInvalidMinMaxBox();  // blame Lenz
-}
-
-TriangularMesh::~TriangularMesh()
-{
-
+    this->minmax = BoundingBox::makeInvalidMinMaxBox();  // blame Lenz
 }
 
 Object* TriangularMesh::clone() const
@@ -100,12 +95,12 @@ uint TriangularMesh::getNumberOfTriangles() const
 
 void TriangularMesh::findNeighbors()
 {
-	*logging::out << logging::Logger::INFO_INTERMEDIATE << "start finding neighbors ...\n";
+    *logging::out << logging::Logger::INFO_INTERMEDIATE << "start finding neighbors ...\n";
 
     auto t = Timer::makeStart();
 
-	TriangleNeighborFinder finder(triangles, size);
-	finder.fillWithNeighborAngles(this);
+    TriangleNeighborFinder finder(triangles, size);
+    finder.fillWithNeighborAngles(this);
 
     t->end();
 
@@ -114,19 +109,19 @@ void TriangularMesh::findNeighbors()
 
 void TriangularMesh::setTriangles(std::vector<Triangle> triangles)
 {
-	this->triangleVec = triangles;
-	initalizeDataFromTriangles();
+    this->triangleVec = triangles;
+    initalizeDataFromTriangles();
 }
 
 void TriangularMesh::setMinMax(BoundingBox minmax)
 {
-	this->minmax = minmax;
+    this->minmax = minmax;
 }
 
 void TriangularMesh::initalizeDataFromTriangles()
 {
-	this->triangles = triangleVec.data();
-	this->size = long(triangleVec.size());
+    this->triangles = triangleVec.data();
+    this->size = long(triangleVec.size());
 
     for (std::size_t i = 0; i < (size_t)this->size; i++) {
         this->minmax.setMinMax(this->triangleVec[i]);
@@ -201,7 +196,7 @@ void TriangularMesh::scale(double offset)
     auto averrageNormals = getAverrageNormalsPerVertex(trianglesPerVertex);
 
 
-    for (std::size_t vertexID = 0; vertexID < this->getNumberOfTriangles() * 3; vertexID++)
+    for (uint vertexID = 0; vertexID < this->getNumberOfTriangles() * 3; vertexID++)
     {
         int coordinatedID = finder.sortedToTriangles[vertexID][IDS::coordinateID];
         Vertex averrageNormal = averrageNormals[coordinatedID];
diff --git a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
index beb1c3a05b25904446e72a62196bcf6213fb0691..2e876e1d3c50b377ef6df9a8489fe8a189849594 100644
--- a/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
+++ b/src/gpu/GridGenerator/geometries/TriangularMesh/TriangularMesh.h
@@ -33,7 +33,6 @@
 #ifndef TriangularMesh_h
 #define TriangularMesh_h
 
-#include <stdio.h>
 #include <vector>
 #include <string>
 #include <memory>
@@ -55,20 +54,20 @@ class TriangularMesh : public Object
 public:
 
     GRIDGENERATOR_EXPORT static TriangularMesh* make(const std::string& fileName, const std::vector<uint> ignorePatches = std::vector<uint>());
-	GRIDGENERATOR_EXPORT TriangularMesh();
+    GRIDGENERATOR_EXPORT TriangularMesh();
     GRIDGENERATOR_EXPORT TriangularMesh(const std::string& inputPath, const std::vector<uint> ignorePatches = std::vector<uint>());
-	GRIDGENERATOR_EXPORT TriangularMesh(const std::string& inputPath, const BoundingBox &box);
-	GRIDGENERATOR_EXPORT ~TriangularMesh();
+    GRIDGENERATOR_EXPORT TriangularMesh(const std::string& inputPath, const BoundingBox &box);
+    GRIDGENERATOR_EXPORT ~TriangularMesh() override = default;
 
     GRIDGENERATOR_EXPORT uint getNumberOfTriangles() const;
 
-	GRIDGENERATOR_EXPORT void setTriangles(std::vector<Triangle> triangles);
-	GRIDGENERATOR_EXPORT void setMinMax(BoundingBox minmax);
+    GRIDGENERATOR_EXPORT void setTriangles(std::vector<Triangle> triangles);
+    GRIDGENERATOR_EXPORT void setMinMax(BoundingBox minmax);
 
-	std::vector<Triangle> triangleVec;
-	Triangle *triangles;
-	long size;
-	BoundingBox minmax;
+    std::vector<Triangle> triangleVec;
+    Triangle *triangles = nullptr;
+    long size = 0;
+    BoundingBox minmax;
 
     SPtr<GbTriFaceMesh3D> VF_GbTriFaceMesh3D;
 
@@ -81,8 +80,8 @@ public:
     GRIDGENERATOR_EXPORT void generateGbTriFaceMesh3D();
 
 private:
-	
-	void initalizeDataFromTriangles();
+
+    void initalizeDataFromTriangles();
 
     static std::vector<Vertex> getAverrageNormalsPerVertex(std::vector<std::vector<Triangle> > trianglesPerVertex);
     static void eliminateTriangleswithIdenticialNormal(std::vector<Triangle> &triangles);
@@ -110,4 +109,3 @@ public:
 
 
 #endif
-
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
index 31303f55f2422312155580f14c1c53b02b97f3c1..3ca02d000b59ae879b33a9ae8eafd710f67def8a 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h
@@ -36,9 +36,9 @@
 #include <vector>
 #include <functional>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
-#include "grid/NodeValues.h"
+#include "gpu/GridGenerator/grid/NodeValues.h"
 
 class Grid;
 
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
index 6c7bf8ca1853826d83fb6a713ffe03716bd2cf9a..f99cdcda06f36152c0a3c5861ee35a98ba67ff78 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.cpp
@@ -40,6 +40,18 @@
 
 using namespace gg;
 
+std::vector<real> Side::getNormal()
+{
+    std::vector<real> normal;
+    if(this->getCoordinate()==X_INDEX)
+        normal = {(real)this->getDirection(), 0.0, 0.0};
+    if(this->getCoordinate()==Y_INDEX)
+        normal = {0.0, (real)this->getDirection(), 0.0};
+    if(this->getCoordinate()==Z_INDEX)
+        normal = {0.0, 0.0, (real)this->getDirection()};
+    return normal;
+}
+
 void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, std::string coord, real constant,
                       real startInner, real endInner, real startOuter, real endOuter)
 {
@@ -49,11 +61,19 @@ void Side::addIndices(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition
         {
             const uint index = getIndex(grid, coord, constant, v1, v2);
 
-            if ((index != INVALID_INDEX) && (  grid->getFieldEntry(index) == vf::gpu::FLUID
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
-                                            || grid->getFieldEntry(index) == vf::gpu::FLUID_FCF ))
+            if ((index != INVALID_INDEX) && (   grid->getFieldEntry(index) == vf::gpu::FLUID
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFC
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_CFF
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCC
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF 
+                                            ||  grid->getFieldEntry(index) == vf::gpu::FLUID_FCF
+                                            
+                                            //! Enforce overlap of BCs on edge nodes
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_PRESSURE
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_VELOCITY 
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_NOSLIP   
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_SLIP     
+                                            ||  grid->getFieldEntry(index)  == vf::gpu::BC_STRESS ))
             {
                 grid->setFieldEntry(index, boundaryCondition->getType());
                 boundaryCondition->indices.push_back(index);
@@ -152,16 +172,21 @@ void Side::setQs(SPtr<Grid> grid, SPtr<BoundaryCondition> boundaryCondition, uin
             else                neighborZ = grid->getLastFluidNode ( coords, 2, grid->getEndZ() );
         }
 
+        //! Only seting q's that partially point in the Side-normal direction
+        bool alignedWithNormal = (this->getNormal()[0]*grid->getDirection()[dir * DIMENSION + 0]+
+                                  this->getNormal()[1]*grid->getDirection()[dir * DIMENSION + 1]+
+                                  this->getNormal()[2]*grid->getDirection()[dir * DIMENSION + 2] ) > 0;
+
         uint neighborIndex = grid->transCoordToIndex( neighborX, neighborY, neighborZ );
-        if( grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID ||
-            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID )
+        if((grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID_BOUNDARY ||
+            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_OUT_OF_GRID          ||
+            grid->getFieldEntry(neighborIndex) == vf::gpu::STOPPER_SOLID)               &&
+            alignedWithNormal )
             qNode[dir] = 0.5;
         else
             qNode[dir] = -1.0;
-
     }
-
+    
     boundaryCondition->qs.push_back(qNode);
 }
 
@@ -260,7 +285,7 @@ void MY::addIndices(std::vector<SPtr<Grid> > grid, uint level, SPtr<BoundaryCond
     real coordinateNormal = grid[level]->getStartY() + grid[level]->getDelta();
 
     if( coordinateNormal > grid[0]->getStartY() + grid[0]->getDelta() ) return;
-
+    
     Side::addIndices(grid[level], boundaryCondition, "y", coordinateNormal, startInner, endInner, startOuter, endOuter);
 }
 
diff --git a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
index c9ffd40b0aa8fc2b8da8b4d85de60faea6927117..53a763bc562ee978042b28d24856fbcca256c5f9 100644
--- a/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
+++ b/src/gpu/GridGenerator/grid/BoundaryConditions/Side.h
@@ -36,7 +36,7 @@
 #include <string>
 #include <vector>
 
-#include "global.h"
+#include "gpu/GridGenerator/global.h"
 
 #define X_INDEX 0
 #define Y_INDEX 1
@@ -72,15 +72,17 @@ public:
 
     virtual SideType whoAmI() const = 0;
 
+    std::vector<real> getNormal();
+
 protected:
-    static void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant,
+    void addIndices(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, std::string coord, real constant,
                            real startInner, real endInner, real startOuter, real endOuter);
 
     static void setPressureNeighborIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index);
 
     static void setStressSamplingIndices(SPtr<gg::BoundaryCondition> boundaryCondition, SPtr<Grid> grid, const uint index);
 
-    static void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index);
+    void setQs(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint index);
 
 private:
     static uint getIndex(SPtr<Grid> grid, std::string coord, real constant, real v1, real v2);
diff --git a/src/gpu/GridGenerator/grid/Field.cpp b/src/gpu/GridGenerator/grid/Field.cpp
index 86985af60e1ca25c247b586dbc2f356c665a8875..8e68bfdf7e324a13a4c9d31d493580e56a48b6cd 100644
--- a/src/gpu/GridGenerator/grid/Field.cpp
+++ b/src/gpu/GridGenerator/grid/Field.cpp
@@ -38,7 +38,6 @@ using namespace vf::gpu;
 
 Field::Field(uint size) : size(size)
 {
-    
 }
 
 void Field::allocateMemory()
@@ -115,7 +114,7 @@ bool Field::isStopperCoarseUnderFine(uint index) const
 
 bool Field::isStopperSolid(uint index) const
 {
-	return field[index] == STOPPER_SOLID;
+    return field[index] == STOPPER_SOLID;
 }
 
 bool Field::isStopper(uint index) const
diff --git a/src/gpu/GridGenerator/grid/Field.h b/src/gpu/GridGenerator/grid/Field.h
index 08fff6da7c5a3f431138dc5039b4d234493ae4b8..bb25b0fc03537b05eaadfa5c3d161f83c1267fae 100644
--- a/src/gpu/GridGenerator/grid/Field.h
+++ b/src/gpu/GridGenerator/grid/Field.h
@@ -51,28 +51,28 @@ public:
     bool is(uint index, char type) const;
     bool isCoarseToFineNode(uint index) const;
     bool isFineToCoarseNode(uint index) const;
-	bool isFluid(uint index) const;
-	bool isInvalidSolid(uint index) const;
+    bool isFluid(uint index) const;
+    bool isInvalidSolid(uint index) const;
     bool isQ(uint index) const;
     bool isBoundaryConditionNode(uint index) const;
     bool isInvalidCoarseUnderFine(uint index) const;
     bool isStopperOutOfGrid(uint index) const;
     bool isStopperCoarseUnderFine(uint index) const;
-	bool isStopperSolid(uint index) const;
-	bool isStopper(uint index) const;
+    bool isStopperSolid(uint index) const;
+    bool isStopper(uint index) const;
     bool isInvalidOutOfGrid(uint index) const;
 
     void setFieldEntry(uint index, char val);
-	void setFieldEntryToFluid(uint index);
-	void setFieldEntryToInvalidSolid(uint index);
+    void setFieldEntryToFluid(uint index);
+    void setFieldEntryToInvalidSolid(uint index);
     void setFieldEntryToStopperOutOfGrid(uint index);
     void setFieldEntryToStopperOutOfGridBoundary(uint index);
     void setFieldEntryToStopperCoarseUnderFine(uint index);
     void setFieldEntryToInvalidCoarseUnderFine(uint index);
     void setFieldEntryToInvalidOutOfGrid(uint index);
 
-private:
-    char *field;
+protected:
+    char *field = nullptr;
     uint size;
 };
 
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
index c2ee59324938d8d3aba89cabe052093b60a7e895..7416cb8551665a9539ee864e68451ef84fdc7917 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/GridBuilder.h
@@ -143,6 +143,8 @@ public:
     virtual uint getNumberOfReceiveIndices(int direction, uint level)          = 0;
     virtual void getSendIndices(int *sendIndices, int direction, int level)    = 0;
     virtual void getReceiveIndices(int *sendIndices, int direction, int level) = 0;
+
+    virtual void findFluidNodes(bool splitDomain) = 0;
 };
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
index b67e5bbb21080f0a2bbc3f6a16ee7aa55b6c925e..b617e38232f07b39d9b2e2d3adb8d4f025a10b4d 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.cpp
@@ -78,20 +78,23 @@ std::shared_ptr<LevelGridBuilder> LevelGridBuilder::makeShared()
 
 void LevelGridBuilder::setSlipBoundaryCondition(SideType sideType, real normalX, real normalY, real normalZ)
 {
-    if(sideType == SideType::GEOMETRY){
-        setSlipGeometryBoundaryCondition(normalX, normalY, normalZ);
-    }else{
-        SPtr<SlipBoundaryCondition> slipBoundaryCondition = SlipBoundaryCondition::make(normalX, normalY, normalZ);
+    for (uint level = 0; level < getNumberOfGridLevels(); level++)
+    {
+        if(sideType == SideType::GEOMETRY){
+            setSlipGeometryBoundaryCondition(normalX, normalY, normalZ);
+        }else{
+            SPtr<SlipBoundaryCondition> slipBoundaryCondition = SlipBoundaryCondition::make(normalX, normalY, normalZ);
 
-        auto side = SideFactory::make(sideType);
+            auto side = SideFactory::make(sideType);
 
-        slipBoundaryCondition->side = side;
-        slipBoundaryCondition->side->addIndices(grids, 0, slipBoundaryCondition);
+            slipBoundaryCondition->side = side;
+            slipBoundaryCondition->side->addIndices(grids, level, slipBoundaryCondition);
 
-        slipBoundaryCondition->fillSlipNormalLists();
-        boundaryConditions[0]->slipBoundaryConditions.push_back(slipBoundaryCondition);
+            slipBoundaryCondition->fillSlipNormalLists();
+            boundaryConditions[level]->slipBoundaryConditions.push_back(slipBoundaryCondition);
 
-        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Slip BC on level " << 0 << " with " << (int)slipBoundaryCondition->indices.size() << "\n";
+            *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Slip BC on level " << level << " with " << (int)slipBoundaryCondition->indices.size() << "\n";
+        }
     }
 }
 
@@ -119,21 +122,24 @@ void LevelGridBuilder::setStressBoundaryCondition(  SideType sideType,
                                                     real nomalX, real normalY, real normalZ, 
                                                     uint samplingOffset, real z0)
 {
-    SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0);
+    for (uint level = 0; level < getNumberOfGridLevels(); level++)
+    {
+        SPtr<StressBoundaryCondition> stressBoundaryCondition = StressBoundaryCondition::make(nomalX, normalY, normalZ, samplingOffset, z0);
 
-    auto side = SideFactory::make(sideType);
+        auto side = SideFactory::make(sideType);
 
-    stressBoundaryCondition->side = side;
-    stressBoundaryCondition->side->addIndices(grids, 0, stressBoundaryCondition);
+        stressBoundaryCondition->side = side;
+        stressBoundaryCondition->side->addIndices(grids, level, stressBoundaryCondition);
 
-    stressBoundaryCondition->fillStressNormalLists();
-    stressBoundaryCondition->fillSamplingOffsetLists();
-    stressBoundaryCondition->fillZ0Lists();
-    // stressBoundaryCondition->fillSamplingIndices(grids, 0, samplingOffset); //redundant with Side::setStressSamplingIndices but potentially a better approach for cases with complex geometries
+        stressBoundaryCondition->fillStressNormalLists();
+        stressBoundaryCondition->fillSamplingOffsetLists();
+        stressBoundaryCondition->fillZ0Lists();
+        // stressBoundaryCondition->fillSamplingIndices(grids, 0, samplingOffset); //redundant with Side::setStressSamplingIndices but potentially a better approach for cases with complex geometries
 
-    boundaryConditions[0]->stressBoundaryConditions.push_back(stressBoundaryCondition);
+        boundaryConditions[level]->stressBoundaryConditions.push_back(stressBoundaryCondition);
 
-    *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Stress BC on level " << 0 << " with " << (int)stressBoundaryCondition->indices.size() << "\n";
+        *logging::out << logging::Logger::INFO_INTERMEDIATE << "Set Stress BC on level " << level << " with " << (int)stressBoundaryCondition->indices.size() << "\n";
+    }
 }
 
 void LevelGridBuilder::setVelocityBoundaryCondition(SideType sideType, real vx, real vy, real vz)
@@ -536,13 +542,13 @@ void LevelGridBuilder::getVelocityValues(real* vx, real* vy, real* vz, int* indi
     int allIndicesCounter = 0;
     for (auto boundaryCondition : boundaryConditions[level]->velocityBoundaryConditions)
     {
-        for(std::size_t i = 0; i < boundaryCondition->indices.size(); i++)
+        for (uint i = 0; i < (uint)boundaryCondition->indices.size(); i++)
         {
             indices[allIndicesCounter] = grids[level]->getSparseIndex(boundaryCondition->indices[i]) +1;  
 
-            vx[allIndicesCounter] = boundaryCondition->getVx((uint)i);
-            vy[allIndicesCounter] = boundaryCondition->getVy((uint)i);
-            vz[allIndicesCounter] = boundaryCondition->getVz((uint)i);
+            vx[allIndicesCounter] = boundaryCondition->getVx(i);
+            vy[allIndicesCounter] = boundaryCondition->getVy(i);
+            vz[allIndicesCounter] = boundaryCondition->getVz(i);
             allIndicesCounter++;
         }
     }
@@ -753,3 +759,11 @@ GRIDGENERATOR_EXPORT SPtr<GeometryBoundaryCondition> LevelGridBuilder::getGeomet
 {
     return this->boundaryConditions[level]->geometryBoundaryCondition;
 }
+
+void LevelGridBuilder::findFluidNodes(bool splitDomain)
+{
+    *logging::out << logging::Logger::INFO_HIGH << "Start findFluidNodes()\n";
+    for (uint i = 0; i < grids.size(); i++)
+        grids[i]->findFluidNodeIndices(splitDomain);
+    *logging::out << logging::Logger::INFO_HIGH << "Done with findFluidNodes()\n";
+}
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
index 97fc60f84bde89405fc244cf5e9bf40f44145da3..69938c31e61fe3fc966bf9937353c20636926f99 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/LevelGridBuilder.h
@@ -204,6 +204,10 @@ public:
     GRIDGENERATOR_EXPORT uint getNumberOfReceiveIndices(int direction, uint level) override;
     GRIDGENERATOR_EXPORT void getSendIndices(int *sendIndices, int direction, int level) override;
     GRIDGENERATOR_EXPORT void getReceiveIndices(int *sendIndices, int direction, int level) override;
+
+
+    // needed for CUDA Streams MultiGPU (Communication Hiding)
+    void findFluidNodes(bool splitDomain) override;
 };
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
index 0d903b9a4fdb067155dd6b9ee6c60257a63b3ad0..da18a883181069f089e7232c9cd1b4f19cc9dc35 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
+++ b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.cpp
@@ -626,14 +626,6 @@ void MultipleGridBuilder::findCommunicationIndices(int direction, LbmOrGks lbmOr
     *logging::out << logging::Logger::INFO_HIGH << "Done with findCommunicationIndices()\n";
 }
 
-void MultipleGridBuilder::findFluidNodes(bool splitDomain)
-{
-    *logging::out << logging::Logger::INFO_HIGH << "Start findFluidNodes()\n";
-    for (uint i = 0; i < grids.size(); i++)
-        grids[i]->findFluidNodeIndices(splitDomain);
-    *logging::out << logging::Logger::INFO_HIGH << "Done with findFluidNodes()\n";
-}
-
 void MultipleGridBuilder::writeGridsToVtk(const std::string& path) const
 {
     for(uint level = 0; level < grids.size(); level++)
diff --git a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
index e28be0087b44d599a792f2f265d3286b650eca63..9627fb0bf7e97a925d4b0ba2c450c507426a48f4 100644
--- a/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
+++ b/src/gpu/GridGenerator/grid/GridBuilder/MultipleGridBuilder.h
@@ -114,9 +114,6 @@ private:
 
 public:
     GRIDGENERATOR_EXPORT void findCommunicationIndices(int direction, LbmOrGks lbmOrGks);
-
-    // needed for CUDA Streams MultiGPU
-    void findFluidNodes(bool splitDomain);
 };
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/GridImp.cpp b/src/gpu/GridGenerator/grid/GridImp.cpp
index f6afafcd521245222c33972dcb46a9e9b2879826..31bbf3ddc87184846fcb01a3e6631358b6a6f864 100644
--- a/src/gpu/GridGenerator/grid/GridImp.cpp
+++ b/src/gpu/GridGenerator/grid/GridImp.cpp
@@ -32,8 +32,6 @@
 //=======================================================================================
 #include "GridImp.h"
 
-#include <stdio.h>
-#include <time.h>
 #include <iostream>
 #include <omp.h>
 #include <sstream>
@@ -995,13 +993,13 @@ void GridImp::setStopperNeighborCoords(uint index)
     real x, y, z;
     this->transIndexToCoords(index, x, y, z);
 
-    if (vf::Math::lessEqual(x + delta, endX) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x + delta, y, z)))
+    if (vf::Math::lessEqual(x + delta, endX + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x + delta, y, z)))
         neighborIndexX[index] = getSparseIndex(x + delta, y, z);
 
-    if (vf::Math::lessEqual(y + delta, endY) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y + delta, z)))
+    if (vf::Math::lessEqual(y + delta, endY + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y + delta, z)))
         neighborIndexY[index] = getSparseIndex(x, y + delta, z);
 
-    if (vf::Math::lessEqual(z + delta, endZ) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y, z + delta)))
+    if (vf::Math::lessEqual(z + delta, endZ + (0.5 * delta)) && !this->field.isInvalidOutOfGrid(this->transCoordToIndex(x, y, z + delta)))
         neighborIndexZ[index] = getSparseIndex(x, y, z + delta);
 
     if (vf::Math::greaterEqual(x - delta, endX) && 
@@ -1265,7 +1263,7 @@ void GridImp::mesh(Triangle &triangle)
                     continue;
 
                 const Vertex point(x, y, z);
-                const int value = triangle.isUnderFace(point);
+                const char value = triangle.isUnderFace(point);
                 //setDebugPoint(index, value);
 
                 if (value == Q_DEPRECATED)
diff --git a/src/gpu/GridGenerator/grid/GridImp.h b/src/gpu/GridGenerator/grid/GridImp.h
index ee30e2b4aaadd737e1fa096eec3b815768ddd0a0..158cba9e67650d65c20e63aa6a35d45f129e2baa 100644
--- a/src/gpu/GridGenerator/grid/GridImp.h
+++ b/src/gpu/GridGenerator/grid/GridImp.h
@@ -109,11 +109,9 @@ private:
     uint sparseSize;
     bool periodicityX = false, periodicityY = false, periodicityZ = false;
 
-    Field field;
     Object* object;
     GridInterface *gridInterface;
 
-    int *neighborIndexX, *neighborIndexY, *neighborIndexZ, *neighborIndexNegative;
     int *sparseIndices;
 
     std::vector<uint> fluidNodeIndices;
@@ -133,6 +131,10 @@ private:
 
     bool enableFixRefinementIntoTheWall;
 
+protected:
+    Field field;
+    int *neighborIndexX, *neighborIndexY, *neighborIndexZ, *neighborIndexNegative;
+
 public:
     void inital(const SPtr<Grid> fineGrid, uint numberOfLayers) override;
     void setOddStart(bool xOddStart, bool yOddStart, bool zOddStart) override;
@@ -155,11 +157,11 @@ public:
     uint transCoordToIndex(const real &x, const real &y, const real &z) const override;
     void transIndexToCoords(uint index, real &x, real &y, real &z) const override;
 
-    virtual void findGridInterface(SPtr<Grid> grid, LbmOrGks lbmOrGks) override;
+    void findGridInterface(SPtr<Grid> grid, LbmOrGks lbmOrGks) override;
 
     void repairGridInterfaceOnMultiGPU(SPtr<Grid> fineGrid) override;
 
-    virtual void limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) override;
+    void limitToSubDomain(SPtr<BoundingBox> subDomainBox, LbmOrGks lbmOrGks) override;
 
     void freeMemory() override;
 
@@ -277,15 +279,16 @@ public:
     void setNeighborIndices(uint index);
     real getFirstFluidNode(real coords[3], int direction, real startCoord) const override;
     real getLastFluidNode(real coords[3], int direction, real startCoord) const override;
+protected:
+    virtual void setStopperNeighborCoords(uint index);
 private:
-    void setStopperNeighborCoords(uint index);
     void getNeighborCoords(real &neighborX, real &neighborY, real &neighborZ, real x, real y, real z) const;
     real getNeighborCoord(bool periodicity, real endCoord, real coords[3], int direction) const;
     void getNegativeNeighborCoords(real &neighborX, real &neighborY, real &neighborZ, real x, real y, real z) const;
     real getNegativeNeighborCoord(bool periodicity, real endCoord, real coords[3], int direction) const;
     
 
-    int getSparseIndex(const real &expectedX, const real &expectedY, const real &expectedZ) const;
+    virtual int getSparseIndex(const real &expectedX, const real &expectedY, const real &expectedZ) const;
 
     static real getMinimumOnNodes(const real &minExact, const real &decimalStart, const real &delta);
     static real getMaximumOnNodes(const real &maxExact, const real &decimalStart, const real &delta);
diff --git a/src/gpu/GridGenerator/grid/GridImpTest.cpp b/src/gpu/GridGenerator/grid/GridImpTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8f5ddb1b01dd88cca7d750017ec328efe02cd92f
--- /dev/null
+++ b/src/gpu/GridGenerator/grid/GridImpTest.cpp
@@ -0,0 +1,258 @@
+#include <array>
+#include <gmock/gmock-matchers.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <ostream>
+
+#include "GridImp.h"
+#include "PointerDefinitions.h"
+#include "grid/Field.h"
+#include "grid/GridBuilder/MultipleGridBuilder.h"
+#include "grid/distributions/Distribution.h"
+
+// This test is commented out because it causes a compiler error in Clang 10 --> The bug is fixed in Clang 14 (https://github.com/google/googletest/issues/2271)
+
+// class FieldDouble : public Field
+// {
+// public:
+//     FieldDouble() : Field(1)
+//     {
+//         this->allocateMemory();
+//     };
+
+//     void setToStopper(uint index)
+//     {
+//         this->field[index] = vf::gpu::STOPPER_SOLID;
+//     }
+// };
+
+// class GridImpDouble : public GridImp
+// {
+// public:
+//     std::array<real, 3> coordsOfTestedNode;
+//     GridImpDouble(Object *object, real startX, real startY, real startZ, real endX, real endY, real endZ, real delta,
+//                   Distribution d, uint level)
+//         : GridImp(object, startX, startY, startZ, endX, endY, endZ, delta, d, level)
+//     {
+//         this->neighborIndexX = new int[5];
+//         this->neighborIndexY = new int[5];
+//         this->neighborIndexZ = new int[5];
+//     }
+
+//     static SPtr<GridImpDouble> makeShared(Object *object, real startX, real startY, real startZ, real endX, real endY,
+//                                           real endZ, real delta, Distribution d, uint level)
+//     {
+//         SPtr<GridImpDouble> grid(new GridImpDouble(object, startX, startY, startZ, endX, endY, endZ, delta, d, level));
+//         return grid;
+//     }
+
+//     void transIndexToCoords(uint, real &x, real &y, real &z) const override
+//     {
+//         x = coordsOfTestedNode[0];
+//         y = coordsOfTestedNode[1];
+//         z = coordsOfTestedNode[2];
+//     }
+
+//     uint transCoordToIndex(const real &, const real &, const real &) const override
+//     {
+//         return 0;
+//     }
+
+//     void setStopperNeighborCoords(uint index) override
+//     {
+//         GridImp::setStopperNeighborCoords(index);
+//     }
+
+//     void setField(Field &field)
+//     {
+//         this->field = field;
+//     }
+
+//     MOCK_METHOD(int, getSparseIndex, (const real &x, const real &y, const real &z), (const, override));
+// };
+
+// // This is test is highly dependent on the implementation. Maybe it should be removed :(
+// TEST(GridImp, setStopperNeighborCoords)
+// {
+//     real end = 1.0;
+//     real delta = 0.1;
+
+//     SPtr<GridImpDouble> gridImp =
+//         GridImpDouble::makeShared(nullptr, 0.0, 0.0, 0.0, end, end, end, delta, Distribution(), 0);
+//     FieldDouble field;
+//     field.setToStopper(0);
+//     gridImp->setField(field);
+
+//     gridImp->coordsOfTestedNode = { end - ((real)0.5 * delta), end - ((real)0.5 * delta), end - ((real)0.5 * delta) };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(3);
+//     gridImp->setStopperNeighborCoords(0);
+
+//     gridImp->coordsOfTestedNode = { end - ((real)0.51 * delta), end - ((real)0.51 * delta),
+//                                     end - ((real)0.51 * delta) };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(3);
+//     gridImp->setStopperNeighborCoords(0);
+//     gridImp->coordsOfTestedNode = { end - ((real)0.99 * delta), end - ((real)0.99 * delta),
+//                                     end - ((real)0.99 * delta) };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(3);
+//     gridImp->setStopperNeighborCoords(0);
+
+//     gridImp->coordsOfTestedNode = { end - delta, end - delta, end - delta };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(3);
+//     gridImp->setStopperNeighborCoords(0);
+
+//     gridImp->coordsOfTestedNode = { end - ((real)1.01 * delta), end - ((real)1.01 * delta),
+//                                     end - ((real)1.01 * delta) };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(3);
+//     gridImp->setStopperNeighborCoords(0);
+
+//     // The grid should not be like this, so this should be fine...
+//     gridImp->coordsOfTestedNode = { end, end, end };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(0);
+//     gridImp->setStopperNeighborCoords(0);
+
+//     gridImp->coordsOfTestedNode = { end - ((real)0.25 * delta), end - ((real)0.25 * delta),
+//                                     end - ((real)0.25 * delta) };
+//     EXPECT_CALL(*gridImp, getSparseIndex).Times(0);
+//     gridImp->setStopperNeighborCoords(0);
+// }
+
+std::array<int, 3> countInvalidNeighbors(SPtr<Grid> grid)
+{
+    auto countInvalidX = 0;
+    auto countInvalidY = 0;
+    auto countInvalidZ = 0;
+    for (uint index = 0; index < grid->getSize(); index++) {
+        if (grid->getNeighborsX()[index] == -1)
+            countInvalidX++;
+        if (grid->getNeighborsY()[index] == -1)
+            countInvalidY++;
+        if (grid->getNeighborsZ()[index] == -1)
+            countInvalidZ++;
+    }
+    return { countInvalidX, countInvalidY, countInvalidZ };
+}
+
+std::array<int, 3> testFluidNodeNeighbors(SPtr<Grid> grid)
+{
+    auto countInvalidX = 0;
+    auto countInvalidXY = 0;
+    auto countInvalidXYZ = 0;
+    for (uint index = 0; index < grid->getSize(); index++) {
+        if (grid->getFieldEntry(index) != vf::gpu::FLUID) {
+            continue;
+        }
+
+        auto neighX = grid->getNeighborsX()[index];
+        if (neighX == -1) {
+            countInvalidX++;
+            continue;
+        }
+
+        auto neighXY = grid->getNeighborsY()[neighX];
+        if (neighXY == -1) {
+            countInvalidXY++;
+            continue;
+        }
+
+        auto neighXYZ = grid->getNeighborsZ()[neighXY];
+        if (neighXYZ == -1) {
+            countInvalidXYZ++;
+            continue;
+        }
+    }
+
+    return { countInvalidX, countInvalidXY, countInvalidXYZ };
+}
+
+class findNeighborsIntegrationTest : public ::testing::Test
+{
+protected:
+    SPtr<MultipleGridBuilder> gridBuilder;
+    void SetUp() override
+    {
+        auto gridFactory = GridFactory::make();
+        gridFactory->setTriangularMeshDiscretizationMethod(TriangularMeshDiscretizationMethod::POINT_IN_OBJECT);
+        gridBuilder = MultipleGridBuilder::makeShared(gridFactory);
+
+        // init logger to avoid segmentation fault in buildGrids
+        logging::Logger::addStream(&std::cout);
+        logging::Logger::setDebugLevel(logging::Logger::Level::WARNING);
+        logging::Logger::timeStamp(logging::Logger::ENABLE);
+        logging::Logger::enablePrintedRankNumbers(logging::Logger::ENABLE);
+    }
+};
+
+TEST_F(findNeighborsIntegrationTest, grid1)
+{
+    const real dx = 0.15;
+    gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
+
+    gridBuilder->buildGrids(LBM, false);
+    auto grid = gridBuilder->getGrid(0);
+
+    // Only the last layer of nodes should have invalid neighbors. The grid is a cube with a side length of 9 nodes
+    // -> 9 * 9 = 81 invalid nodes are expected
+    auto numberOfInvalidNeighbors = countInvalidNeighbors(grid);
+    auto expected = 9 * 9;
+    EXPECT_THAT(numberOfInvalidNeighbors[0], testing::Eq(expected));
+    EXPECT_THAT(numberOfInvalidNeighbors[1], testing::Eq(expected));
+    EXPECT_THAT(numberOfInvalidNeighbors[2], testing::Eq(expected));
+
+    // additional test: all fluid nodes should have valid neighbors
+    auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
+    EXPECT_THAT(numberInvalidFluidNeighbors[0], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[1], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[2], testing::Eq(0));
+}
+
+TEST_F(findNeighborsIntegrationTest, grid2)
+{
+    const real dx = 1.0 / 64;
+    gridBuilder->addCoarseGrid(-0.6, -0.6, -0.6, 0.6, 0.6, 0.6, dx);
+
+    gridBuilder->buildGrids(LBM, false);
+    auto grid = gridBuilder->getGrid(0);
+
+    // Only the last layer of nodes should have invalid neighbors. The grid is a cube with a side length of 79 nodes
+    // -> 79 * 79 invalid nodes are expected
+    auto numberOfInvalidNeighbors = countInvalidNeighbors(grid);
+    auto expected = 79 * 79;
+    EXPECT_THAT(numberOfInvalidNeighbors[0], testing::Eq(expected));
+    EXPECT_THAT(numberOfInvalidNeighbors[1], testing::Eq(expected));
+    EXPECT_THAT(numberOfInvalidNeighbors[2], testing::Eq(expected));
+
+    // additional test: all fluid nodes should have valid neighbors
+    auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
+    EXPECT_THAT(numberInvalidFluidNeighbors[0], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[1], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[2], testing::Eq(0));
+}
+
+TEST_F(findNeighborsIntegrationTest, validFluidNeighbors1)
+{
+    real dx = 0.17;
+    gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
+
+    gridBuilder->buildGrids(LBM, false);
+    auto grid = gridBuilder->getGrid(0);
+
+    auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
+    EXPECT_THAT(numberInvalidFluidNeighbors[0], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[1], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[2], testing::Eq(0));
+}
+
+TEST_F(findNeighborsIntegrationTest, validFluidNeighbors2)
+{
+    real dx = 0.18;
+    gridBuilder->addCoarseGrid(0.0, 0.0, 0.0, 1.0, 1.0, 1.0, dx);
+
+    gridBuilder->buildGrids(LBM, false);
+    auto grid = gridBuilder->getGrid(0);
+
+    auto numberInvalidFluidNeighbors = testFluidNodeNeighbors(grid);
+    EXPECT_THAT(numberInvalidFluidNeighbors[0], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[1], testing::Eq(0));
+    EXPECT_THAT(numberInvalidFluidNeighbors[2], testing::Eq(0));
+}
diff --git a/src/gpu/GridGenerator/grid/GridInterface.cpp b/src/gpu/GridGenerator/grid/GridInterface.cpp
index 9194f12824e2a95b4880647586eeba70be9984b1..a373b0eafbdb6a33338f54bf3efdc47a0edca315 100644
--- a/src/gpu/GridGenerator/grid/GridInterface.cpp
+++ b/src/gpu/GridGenerator/grid/GridInterface.cpp
@@ -376,7 +376,7 @@ uint GridInterface::findOffsetCF(const uint& indexOnCoarseGrid, GridImp* coarseG
     Cell cell(x, y, z, coarseGrid->getDelta());
 
     if( coarseGrid->cellContainsOnly( cell, FLUID, FLUID_CFC ) ){
-        this->cf.offset[ interfaceIndex ] = dir::REST;
+        this->cf.offset[ interfaceIndex ] = dir::DIR_000;
         return indexOnCoarseGrid;
     }
 
@@ -411,7 +411,7 @@ uint GridInterface::findOffsetFC(const uint& indexOnFineGrid, GridImp* fineGrid,
     Cell cell(x, y, z, fineGrid->getDelta());
 
     if( fineGrid->cellContainsOnly( cell, FLUID, FLUID_FCF ) ){
-        this->fc.offset[ interfaceIndex ] = dir::REST;
+        this->fc.offset[ interfaceIndex ] = dir::DIR_000;
         return indexOnFineGrid;
     }
 
diff --git a/src/gpu/GridGenerator/grid/GridInterface.h b/src/gpu/GridGenerator/grid/GridInterface.h
index b5f71317e7755a8b6bcfe3da084e0fc9155642f8..713d495d4386e0fe743357a803b84be02c061561 100644
--- a/src/gpu/GridGenerator/grid/GridInterface.h
+++ b/src/gpu/GridGenerator/grid/GridInterface.h
@@ -47,9 +47,9 @@ public:
     void GRIDGENERATOR_EXPORT findBoundaryGridInterfaceCF(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
 
 
-	void GRIDGENERATOR_EXPORT findInterfaceCF_GKS(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
+    void GRIDGENERATOR_EXPORT findInterfaceCF_GKS(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
 
-	void GRIDGENERATOR_EXPORT findInterfaceFC(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
+    void GRIDGENERATOR_EXPORT findInterfaceFC(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
     void GRIDGENERATOR_EXPORT findOverlapStopper(const uint& indexOnCoarseGrid, GridImp* coarseGrid, GridImp* fineGrid);
     
     void GRIDGENERATOR_EXPORT findInvalidBoundaryNodes(const uint& indexOnCoarseGrid, GridImp* coarseGrid);
@@ -66,7 +66,7 @@ public:
         uint *fine, *coarse;
         uint numberOfEntries = 0;
         uint *offset;
-    } fc, cf;
+    } fc{}, cf{};
 
 
 private:
diff --git a/src/gpu/GridGenerator/grid/NodeValues.h b/src/gpu/GridGenerator/grid/NodeValues.h
index b8312b0673337d11b4bdf0b8052e89d92ce127ef..f1a948cd9a84d1d2454c37c9a23a25639f598e75 100644
--- a/src/gpu/GridGenerator/grid/NodeValues.h
+++ b/src/gpu/GridGenerator/grid/NodeValues.h
@@ -33,9 +33,7 @@
 #ifndef NodeValues_H
 #define NodeValues_H
 
-namespace vf
-{
-namespace gpu
+namespace vf::gpu
 {
 
 static constexpr char FLUID = 0;
@@ -73,7 +71,6 @@ static constexpr char Q_DEPRECATED              = 52;
 
 static constexpr char OVERLAP_TMP = 60;
 
-} // namespace gpu
-} // namespace vf
+}
 
 #endif
diff --git a/src/gpu/GridGenerator/grid/distributions/Distribution.cpp b/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
index 8192c114413dfdf32492717b57870cadd35130cb..a6e11ac909186618d765596f15263035b3b6401a 100644
--- a/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
+++ b/src/gpu/GridGenerator/grid/distributions/Distribution.cpp
@@ -35,8 +35,8 @@
 #include <stdio.h>
 
 #include "grid/distributions/D3Q27.h"
-#include "lbm/constants/D3Q27.h"
 #include "grid/Grid.h"
+#include "lbm/constants/D3Q27.h"
 using namespace vf::lbm::dir;
 
 Distribution DistributionHelper::getDistribution27() 
@@ -49,148 +49,148 @@ Distribution DistributionHelper::getDistribution27()
     d27.dirs = new int[(ENDDIR + 1) * DIMENSION];
 
     d27.directions = new Direction[ENDDIR + 1];
-    d27.directions[E] = Direction(DIR_27_E_X, DIR_27_E_Y, DIR_27_E_Z);
-    d27.directions[W] = Direction(DIR_27_W_X, DIR_27_W_Y, DIR_27_W_Z);
-    d27.directions[N] = Direction(DIR_27_N_X, DIR_27_N_Y, DIR_27_N_Z);
-    d27.directions[S] = Direction(DIR_27_S_X, DIR_27_S_Y, DIR_27_S_Z);
-    d27.directions[T] = Direction(DIR_27_T_X, DIR_27_T_Y, DIR_27_T_Z);
-    d27.directions[B] = Direction(DIR_27_B_X, DIR_27_B_Y, DIR_27_B_Z);
-
-    d27.directions[NE] = Direction(DIR_27_NE_X, DIR_27_NE_Y, DIR_27_NE_Z);
-    d27.directions[SW] = Direction(DIR_27_SW_X, DIR_27_SW_Y, DIR_27_SW_Z);
-    d27.directions[SE] = Direction(DIR_27_SE_X, DIR_27_SE_Y, DIR_27_SE_Z);
-    d27.directions[NW] = Direction(DIR_27_NW_X, DIR_27_NW_Y, DIR_27_NW_Z);
-
-    d27.directions[TE] = Direction(DIR_27_TE_X, DIR_27_TE_Y, DIR_27_TE_Z);
-    d27.directions[BW] = Direction(DIR_27_BW_X, DIR_27_BW_Y, DIR_27_BW_Z);
-    d27.directions[BE] = Direction(DIR_27_BE_X, DIR_27_BE_Y, DIR_27_BE_Z);
-    d27.directions[TW] = Direction(DIR_27_TW_X, DIR_27_TW_Y, DIR_27_TW_Z);
-
-    d27.directions[TN] = Direction(DIR_27_TN_X, DIR_27_TN_Y, DIR_27_TN_Z);
-    d27.directions[BS] = Direction(DIR_27_BS_X, DIR_27_BS_Y, DIR_27_BS_Z);
-    d27.directions[BN] = Direction(DIR_27_BN_X, DIR_27_BN_Y, DIR_27_BN_Z);
-    d27.directions[TS] = Direction(DIR_27_TS_X, DIR_27_TS_Y, DIR_27_TS_Z);
-
-    d27.directions[REST] = Direction(DIR_27_REST_X, DIR_27_REST_Y, DIR_27_REST_Z);
-
-    d27.directions[TNE] = Direction(DIR_27_TNE_X, DIR_27_TNE_Y, DIR_27_TNE_Z);
-    d27.directions[TNW] = Direction(DIR_27_TNW_X, DIR_27_TNW_Y, DIR_27_TNW_Z);
-    d27.directions[TSE] = Direction(DIR_27_TSE_X, DIR_27_TSE_Y, DIR_27_TSE_Z);
-    d27.directions[TSW] = Direction(DIR_27_TSW_X, DIR_27_TSW_Y, DIR_27_TSW_Z);
-
-    d27.directions[BNE] = Direction(DIR_27_BNE_X, DIR_27_BNE_Y, DIR_27_BNE_Z);
-    d27.directions[BNW] = Direction(DIR_27_BNW_X, DIR_27_BNW_Y, DIR_27_BNW_Z);
-    d27.directions[BSE] = Direction(DIR_27_BSE_X, DIR_27_BSE_Y, DIR_27_BSE_Z);
-    d27.directions[BSW] = Direction(DIR_27_BSW_X, DIR_27_BSW_Y, DIR_27_BSW_Z);
-
-
-    d27.dirs[E * 3    ] = DIR_27_E_X;
-    d27.dirs[E * 3 + 1] = DIR_27_E_Y;
-    d27.dirs[E * 3 + 2] = DIR_27_E_Z;
-
-    d27.dirs[W * 3    ] = DIR_27_W_X;
-    d27.dirs[W * 3 + 1] = DIR_27_W_Y;
-    d27.dirs[W * 3 + 2] = DIR_27_W_Z;
+    d27.directions[DIR_P00] = Direction(DIR_27_E_X, DIR_27_E_Y, DIR_27_E_Z);
+    d27.directions[DIR_M00] = Direction(DIR_27_W_X, DIR_27_W_Y, DIR_27_W_Z);
+    d27.directions[DIR_0P0] = Direction(DIR_27_N_X, DIR_27_N_Y, DIR_27_N_Z);
+    d27.directions[DIR_0M0] = Direction(DIR_27_S_X, DIR_27_S_Y, DIR_27_S_Z);
+    d27.directions[DIR_00P] = Direction(DIR_27_T_X, DIR_27_T_Y, DIR_27_T_Z);
+    d27.directions[DIR_00M] = Direction(DIR_27_B_X, DIR_27_B_Y, DIR_27_B_Z);
+
+    d27.directions[DIR_PP0] = Direction(DIR_27_NE_X, DIR_27_NE_Y, DIR_27_NE_Z);
+    d27.directions[DIR_MM0] = Direction(DIR_27_SW_X, DIR_27_SW_Y, DIR_27_SW_Z);
+    d27.directions[DIR_PM0] = Direction(DIR_27_SE_X, DIR_27_SE_Y, DIR_27_SE_Z);
+    d27.directions[DIR_MP0] = Direction(DIR_27_NW_X, DIR_27_NW_Y, DIR_27_NW_Z);
+
+    d27.directions[DIR_P0P] = Direction(DIR_27_TE_X, DIR_27_TE_Y, DIR_27_TE_Z);
+    d27.directions[DIR_M0M] = Direction(DIR_27_BW_X, DIR_27_BW_Y, DIR_27_BW_Z);
+    d27.directions[DIR_P0M] = Direction(DIR_27_BE_X, DIR_27_BE_Y, DIR_27_BE_Z);
+    d27.directions[DIR_M0P] = Direction(DIR_27_TW_X, DIR_27_TW_Y, DIR_27_TW_Z);
+
+    d27.directions[DIR_0PP] = Direction(DIR_27_TN_X, DIR_27_TN_Y, DIR_27_TN_Z);
+    d27.directions[DIR_0MM] = Direction(DIR_27_BS_X, DIR_27_BS_Y, DIR_27_BS_Z);
+    d27.directions[DIR_0PM] = Direction(DIR_27_BN_X, DIR_27_BN_Y, DIR_27_BN_Z);
+    d27.directions[DIR_0MP] = Direction(DIR_27_TS_X, DIR_27_TS_Y, DIR_27_TS_Z);
+
+    d27.directions[DIR_000] = Direction(DIR_27_REST_X, DIR_27_REST_Y, DIR_27_REST_Z);
+
+    d27.directions[DIR_PPP] = Direction(DIR_27_TNE_X, DIR_27_TNE_Y, DIR_27_TNE_Z);
+    d27.directions[DIR_MPP] = Direction(DIR_27_TNW_X, DIR_27_TNW_Y, DIR_27_TNW_Z);
+    d27.directions[DIR_PMP] = Direction(DIR_27_TSE_X, DIR_27_TSE_Y, DIR_27_TSE_Z);
+    d27.directions[DIR_MMP] = Direction(DIR_27_TSW_X, DIR_27_TSW_Y, DIR_27_TSW_Z);
+
+    d27.directions[DIR_PPM] = Direction(DIR_27_BNE_X, DIR_27_BNE_Y, DIR_27_BNE_Z);
+    d27.directions[DIR_MPM]= Direction(DIR_27_BNW_X, DIR_27_BNW_Y, DIR_27_BNW_Z);
+    d27.directions[DIR_PMM]= Direction(DIR_27_BSE_X, DIR_27_BSE_Y, DIR_27_BSE_Z);
+    d27.directions[DIR_MMM] = Direction(DIR_27_BSW_X, DIR_27_BSW_Y, DIR_27_BSW_Z);
+
+
+    d27.dirs[DIR_P00 * 3    ] = DIR_27_E_X;
+    d27.dirs[DIR_P00 * 3 + 1] = DIR_27_E_Y;
+    d27.dirs[DIR_P00 * 3 + 2] = DIR_27_E_Z;
+
+    d27.dirs[DIR_M00 * 3    ] = DIR_27_W_X;
+    d27.dirs[DIR_M00 * 3 + 1] = DIR_27_W_Y;
+    d27.dirs[DIR_M00 * 3 + 2] = DIR_27_W_Z;
     
-    d27.dirs[N * 3    ] = DIR_27_N_X;
-    d27.dirs[N * 3 + 1] = DIR_27_N_Y;
-    d27.dirs[N * 3 + 2] = DIR_27_N_Z;
+    d27.dirs[DIR_0P0 * 3    ] = DIR_27_N_X;
+    d27.dirs[DIR_0P0 * 3 + 1] = DIR_27_N_Y;
+    d27.dirs[DIR_0P0 * 3 + 2] = DIR_27_N_Z;
 
-    d27.dirs[S * 3    ] = DIR_27_S_X;
-    d27.dirs[S * 3 + 1] = DIR_27_S_Y;
-    d27.dirs[S * 3 + 2] = DIR_27_S_Z;
+    d27.dirs[DIR_0M0 * 3    ] = DIR_27_S_X;
+    d27.dirs[DIR_0M0 * 3 + 1] = DIR_27_S_Y;
+    d27.dirs[DIR_0M0 * 3 + 2] = DIR_27_S_Z;
     
-    d27.dirs[T * 3    ] = DIR_27_T_X;
-    d27.dirs[T * 3 + 1] = DIR_27_T_Y;
-    d27.dirs[T * 3 + 2] = DIR_27_T_Z;
+    d27.dirs[DIR_00P * 3    ] = DIR_27_T_X;
+    d27.dirs[DIR_00P * 3 + 1] = DIR_27_T_Y;
+    d27.dirs[DIR_00P * 3 + 2] = DIR_27_T_Z;
     
-    d27.dirs[B * 3    ] = DIR_27_B_X;
-    d27.dirs[B * 3 + 1] = DIR_27_B_Y;
-    d27.dirs[B * 3 + 2] = DIR_27_B_Z;
+    d27.dirs[DIR_00M * 3    ] = DIR_27_B_X;
+    d27.dirs[DIR_00M * 3 + 1] = DIR_27_B_Y;
+    d27.dirs[DIR_00M * 3 + 2] = DIR_27_B_Z;
 
-    d27.dirs[NE * 3    ] = DIR_27_NE_X;
-    d27.dirs[NE * 3 + 1] = DIR_27_NE_Y;
-    d27.dirs[NE * 3 + 2] = DIR_27_NE_Z;
+    d27.dirs[DIR_PP0 * 3    ] = DIR_27_NE_X;
+    d27.dirs[DIR_PP0 * 3 + 1] = DIR_27_NE_Y;
+    d27.dirs[DIR_PP0 * 3 + 2] = DIR_27_NE_Z;
     
-    d27.dirs[SW * 3    ] = DIR_27_SW_X;
-    d27.dirs[SW * 3 + 1] = DIR_27_SW_Y;
-    d27.dirs[SW * 3 + 2] = DIR_27_SW_Z;
+    d27.dirs[DIR_MM0 * 3    ] = DIR_27_SW_X;
+    d27.dirs[DIR_MM0 * 3 + 1] = DIR_27_SW_Y;
+    d27.dirs[DIR_MM0 * 3 + 2] = DIR_27_SW_Z;
 
-    d27.dirs[SE * 3    ] = DIR_27_SE_X;
-    d27.dirs[SE * 3 + 1] = DIR_27_SE_Y;
-    d27.dirs[SE * 3 + 2] = DIR_27_SE_Z;
+    d27.dirs[DIR_PM0 * 3    ] = DIR_27_SE_X;
+    d27.dirs[DIR_PM0 * 3 + 1] = DIR_27_SE_Y;
+    d27.dirs[DIR_PM0 * 3 + 2] = DIR_27_SE_Z;
 
-    d27.dirs[NW * 3    ] = DIR_27_NW_X;
-    d27.dirs[NW * 3 + 1] = DIR_27_NW_Y;
-    d27.dirs[NW * 3 + 2] = DIR_27_NW_Z;
+    d27.dirs[DIR_MP0 * 3    ] = DIR_27_NW_X;
+    d27.dirs[DIR_MP0 * 3 + 1] = DIR_27_NW_Y;
+    d27.dirs[DIR_MP0 * 3 + 2] = DIR_27_NW_Z;
 
-    d27.dirs[TE * 3    ] = DIR_27_TE_X;
-    d27.dirs[TE * 3 + 1] = DIR_27_TE_Y;
-    d27.dirs[TE * 3 + 2] = DIR_27_TE_Z;
+    d27.dirs[DIR_P0P * 3    ] = DIR_27_TE_X;
+    d27.dirs[DIR_P0P * 3 + 1] = DIR_27_TE_Y;
+    d27.dirs[DIR_P0P * 3 + 2] = DIR_27_TE_Z;
 
-    d27.dirs[BW * 3    ] = DIR_27_BW_X;
-    d27.dirs[BW * 3 + 1] = DIR_27_BW_Y;
-    d27.dirs[BW * 3 + 2] = DIR_27_BW_Z;
+    d27.dirs[DIR_M0M * 3    ] = DIR_27_BW_X;
+    d27.dirs[DIR_M0M * 3 + 1] = DIR_27_BW_Y;
+    d27.dirs[DIR_M0M * 3 + 2] = DIR_27_BW_Z;
                               
-    d27.dirs[BE * 3    ] = DIR_27_BE_X;
-    d27.dirs[BE * 3 + 1] = DIR_27_BE_Y;
-    d27.dirs[BE * 3 + 2] = DIR_27_BE_Z;
+    d27.dirs[DIR_P0M * 3    ] = DIR_27_BE_X;
+    d27.dirs[DIR_P0M * 3 + 1] = DIR_27_BE_Y;
+    d27.dirs[DIR_P0M * 3 + 2] = DIR_27_BE_Z;
                               
-    d27.dirs[TW * 3    ] = DIR_27_TW_X;
-    d27.dirs[TW * 3 + 1] = DIR_27_TW_Y;
-    d27.dirs[TW * 3 + 2] = DIR_27_TW_Z;
+    d27.dirs[DIR_M0P * 3    ] = DIR_27_TW_X;
+    d27.dirs[DIR_M0P * 3 + 1] = DIR_27_TW_Y;
+    d27.dirs[DIR_M0P * 3 + 2] = DIR_27_TW_Z;
                               
-    d27.dirs[TN * 3    ] = DIR_27_TN_X;
-    d27.dirs[TN * 3 + 1] = DIR_27_TN_Y;
-    d27.dirs[TN * 3 + 2] = DIR_27_TN_Z;
+    d27.dirs[DIR_0PP * 3    ] = DIR_27_TN_X;
+    d27.dirs[DIR_0PP * 3 + 1] = DIR_27_TN_Y;
+    d27.dirs[DIR_0PP * 3 + 2] = DIR_27_TN_Z;
                               
-    d27.dirs[BS * 3    ] = DIR_27_BS_X;
-    d27.dirs[BS * 3 + 1] = DIR_27_BS_Y;
-    d27.dirs[BS * 3 + 2] = DIR_27_BS_Z;
+    d27.dirs[DIR_0MM * 3    ] = DIR_27_BS_X;
+    d27.dirs[DIR_0MM * 3 + 1] = DIR_27_BS_Y;
+    d27.dirs[DIR_0MM * 3 + 2] = DIR_27_BS_Z;
                               
-    d27.dirs[BN * 3    ] = DIR_27_BN_X;
-    d27.dirs[BN * 3 + 1] = DIR_27_BN_Y;
-    d27.dirs[BN * 3 + 2] = DIR_27_BN_Z;
+    d27.dirs[DIR_0PM * 3    ] = DIR_27_BN_X;
+    d27.dirs[DIR_0PM * 3 + 1] = DIR_27_BN_Y;
+    d27.dirs[DIR_0PM * 3 + 2] = DIR_27_BN_Z;
 
-    d27.dirs[TS * 3    ] = DIR_27_TS_X;
-    d27.dirs[TS * 3 + 1] = DIR_27_TS_Y;
-    d27.dirs[TS * 3 + 2] = DIR_27_TS_Z;
+    d27.dirs[DIR_0MP * 3    ] = DIR_27_TS_X;
+    d27.dirs[DIR_0MP * 3 + 1] = DIR_27_TS_Y;
+    d27.dirs[DIR_0MP * 3 + 2] = DIR_27_TS_Z;
 
-    d27.dirs[REST * 3    ] = DIR_27_REST_X;   //
-    d27.dirs[REST * 3 + 1] = DIR_27_REST_Y;   //  ZERO ELEMENT
-    d27.dirs[REST * 3 + 2] = DIR_27_REST_Z;   //
+    d27.dirs[DIR_000 * 3    ] = DIR_27_REST_X;   //
+    d27.dirs[DIR_000 * 3 + 1] = DIR_27_REST_Y;   //  ZERO ELEMENT
+    d27.dirs[DIR_000 * 3 + 2] = DIR_27_REST_Z;   //
 
-    d27.dirs[TNE * 3    ] = DIR_27_TNE_X;
-    d27.dirs[TNE * 3 + 1] = DIR_27_TNE_Y;
-    d27.dirs[TNE * 3 + 2] = DIR_27_TNE_Z;
+    d27.dirs[DIR_PPP * 3    ] = DIR_27_TNE_X;
+    d27.dirs[DIR_PPP * 3 + 1] = DIR_27_TNE_Y;
+    d27.dirs[DIR_PPP * 3 + 2] = DIR_27_TNE_Z;
 
-    d27.dirs[BNE * 3    ] = DIR_27_BNE_X;
-    d27.dirs[BNE * 3 + 1] = DIR_27_BNE_Y;
-    d27.dirs[BNE * 3 + 2] = DIR_27_BNE_Z;
+    d27.dirs[DIR_PPM * 3    ] = DIR_27_BNE_X;
+    d27.dirs[DIR_PPM * 3 + 1] = DIR_27_BNE_Y;
+    d27.dirs[DIR_PPM * 3 + 2] = DIR_27_BNE_Z;
 
-    d27.dirs[TSE * 3    ] = DIR_27_TSE_X;
-    d27.dirs[TSE * 3 + 1] = DIR_27_TSE_Y;
-    d27.dirs[TSE * 3 + 2] = DIR_27_TSE_Z;
+    d27.dirs[DIR_PMP * 3    ] = DIR_27_TSE_X;
+    d27.dirs[DIR_PMP * 3 + 1] = DIR_27_TSE_Y;
+    d27.dirs[DIR_PMP * 3 + 2] = DIR_27_TSE_Z;
 
-    d27.dirs[BSE * 3    ] = DIR_27_BSE_X;
-    d27.dirs[BSE * 3 + 1] = DIR_27_BSE_Y;
-    d27.dirs[BSE * 3 + 2] = DIR_27_BSE_Z;
+    d27.dirs[DIR_PMM * 3    ] = DIR_27_BSE_X;
+    d27.dirs[DIR_PMM * 3 + 1] = DIR_27_BSE_Y;
+    d27.dirs[DIR_PMM * 3 + 2] = DIR_27_BSE_Z;
 
-    d27.dirs[TNW * 3    ] = DIR_27_TNW_X;
-    d27.dirs[TNW * 3 + 1] = DIR_27_TNW_Y;
-    d27.dirs[TNW * 3 + 2] = DIR_27_TNW_Z;
+    d27.dirs[DIR_MPP * 3    ] = DIR_27_TNW_X;
+    d27.dirs[DIR_MPP * 3 + 1] = DIR_27_TNW_Y;
+    d27.dirs[DIR_MPP * 3 + 2] = DIR_27_TNW_Z;
 
-    d27.dirs[BNW * 3    ] = DIR_27_BNW_X;
-    d27.dirs[BNW * 3 + 1] = DIR_27_BNW_Y;
-    d27.dirs[BNW * 3 + 2] = DIR_27_BNW_Z;
+    d27.dirs[DIR_MPM * 3    ] = DIR_27_BNW_X;
+    d27.dirs[DIR_MPM * 3 + 1] = DIR_27_BNW_Y;
+    d27.dirs[DIR_MPM * 3 + 2] = DIR_27_BNW_Z;
 
-    d27.dirs[TSW * 3    ] = DIR_27_TSW_X;
-    d27.dirs[TSW * 3 + 1] = DIR_27_TSW_Y;
-    d27.dirs[TSW * 3 + 2] = DIR_27_TSW_Z;
+    d27.dirs[DIR_MMP * 3    ] = DIR_27_TSW_X;
+    d27.dirs[DIR_MMP * 3 + 1] = DIR_27_TSW_Y;
+    d27.dirs[DIR_MMP * 3 + 2] = DIR_27_TSW_Z;
 
-    d27.dirs[BSW * 3    ] = DIR_27_BSW_X;
-    d27.dirs[BSW * 3 + 1] = DIR_27_BSW_Y;
-    d27.dirs[BSW * 3 + 2] = DIR_27_BSW_Z;
+    d27.dirs[DIR_MMM * 3    ] = DIR_27_BSW_X;
+    d27.dirs[DIR_MMM * 3 + 1] = DIR_27_BSW_Y;
+    d27.dirs[DIR_MMM * 3 + 2] = DIR_27_BSW_Z;
 
     return d27;
 }
diff --git a/src/gpu/GridGenerator/grid/distributions/DistributionTest.cpp b/src/gpu/GridGenerator/grid/distributions/DistributionTest.cpp
index 86b2f3b5139482d2be92788ea646cea1069a6052..3a9dd1d33ec4e1567b7b85e99970a5245ebbb06d 100644
--- a/src/gpu/GridGenerator/grid/distributions/DistributionTest.cpp
+++ b/src/gpu/GridGenerator/grid/distributions/DistributionTest.cpp
@@ -10,52 +10,52 @@ TEST(DistributionTest, DistributionReturnsCorrectDirections)
 {
     Distribution dist = DistributionHelper::getDistribution27();
 
-    EXPECT_THAT(dist.directions[E][0], testing::Eq(DIR_27_E_X));
-    EXPECT_THAT(dist.directions[E][1], testing::Eq(DIR_27_E_Y));
-    EXPECT_THAT(dist.directions[E][2], testing::Eq(DIR_27_E_Z));
-    EXPECT_THAT(dist.dirs[E * 3    ], testing::Eq(DIR_27_E_X));
-    EXPECT_THAT(dist.dirs[E * 3 + 1], testing::Eq(DIR_27_E_Y));
-    EXPECT_THAT(dist.dirs[E * 3 + 2], testing::Eq(DIR_27_E_Z));
-
-    EXPECT_THAT(dist.directions[B][0], testing::Eq(DIR_27_B_X));
-    EXPECT_THAT(dist.directions[B][1], testing::Eq(DIR_27_B_Y));
-    EXPECT_THAT(dist.directions[B][2], testing::Eq(DIR_27_B_Z));
-    EXPECT_THAT(dist.dirs[B * 3    ], testing::Eq(DIR_27_B_X));
-    EXPECT_THAT(dist.dirs[B * 3 + 1], testing::Eq(DIR_27_B_Y));
-    EXPECT_THAT(dist.dirs[B * 3 + 2], testing::Eq(DIR_27_B_Z));
+    EXPECT_THAT(dist.directions[DIR_P00][0], testing::Eq(DIR_27_E_X));
+    EXPECT_THAT(dist.directions[DIR_P00][1], testing::Eq(DIR_27_E_Y));
+    EXPECT_THAT(dist.directions[DIR_P00][2], testing::Eq(DIR_27_E_Z));
+    EXPECT_THAT(dist.dirs[DIR_P00 * 3    ], testing::Eq(DIR_27_E_X));
+    EXPECT_THAT(dist.dirs[DIR_P00 * 3 + 1], testing::Eq(DIR_27_E_Y));
+    EXPECT_THAT(dist.dirs[DIR_P00 * 3 + 2], testing::Eq(DIR_27_E_Z));
+
+    EXPECT_THAT(dist.directions[DIR_00M][0], testing::Eq(DIR_27_B_X));
+    EXPECT_THAT(dist.directions[DIR_00M][1], testing::Eq(DIR_27_B_Y));
+    EXPECT_THAT(dist.directions[DIR_00M][2], testing::Eq(DIR_27_B_Z));
+    EXPECT_THAT(dist.dirs[DIR_00M * 3    ], testing::Eq(DIR_27_B_X));
+    EXPECT_THAT(dist.dirs[DIR_00M * 3 + 1], testing::Eq(DIR_27_B_Y));
+    EXPECT_THAT(dist.dirs[DIR_00M * 3 + 2], testing::Eq(DIR_27_B_Z));
     
-    EXPECT_THAT(dist.directions[REST][0], testing::Eq(0));
-    EXPECT_THAT(dist.directions[REST][1], testing::Eq(0));
-    EXPECT_THAT(dist.directions[REST][2], testing::Eq(0));
-    EXPECT_THAT(dist.dirs[REST * 3    ], testing::Eq(0));
-    EXPECT_THAT(dist.dirs[REST * 3 + 1], testing::Eq(0));
-    EXPECT_THAT(dist.dirs[REST * 3 + 2], testing::Eq(0));
-
-    EXPECT_THAT(dist.directions[NE][0], testing::Eq(DIR_27_NE_X));
-    EXPECT_THAT(dist.directions[NE][1], testing::Eq(DIR_27_NE_Y));
-    EXPECT_THAT(dist.directions[NE][2], testing::Eq(DIR_27_NE_Z));
-    EXPECT_THAT(dist.dirs[NE * 3    ], testing::Eq(DIR_27_NE_X));
-    EXPECT_THAT(dist.dirs[NE * 3 + 1], testing::Eq(DIR_27_NE_Y));
-    EXPECT_THAT(dist.dirs[NE * 3 + 2], testing::Eq(DIR_27_NE_Z));
-
-    EXPECT_THAT(dist.directions[TS][0], testing::Eq(DIR_27_TS_X));
-    EXPECT_THAT(dist.directions[TS][1], testing::Eq(DIR_27_TS_Y));
-    EXPECT_THAT(dist.directions[TS][2], testing::Eq(DIR_27_TS_Z));
-    EXPECT_THAT(dist.dirs[TS * 3    ], testing::Eq(DIR_27_TS_X));
-    EXPECT_THAT(dist.dirs[TS * 3 + 1], testing::Eq(DIR_27_TS_Y));
-    EXPECT_THAT(dist.dirs[TS * 3 + 2], testing::Eq(DIR_27_TS_Z));
-
-    EXPECT_THAT(dist.directions[TNE][0], testing::Eq(DIR_27_TNE_X));
-    EXPECT_THAT(dist.directions[TNE][1], testing::Eq(DIR_27_TNE_Y));
-    EXPECT_THAT(dist.directions[TNE][2], testing::Eq(DIR_27_TNE_Z));
-    EXPECT_THAT(dist.dirs[TNE * 3    ], testing::Eq(DIR_27_TNE_X));
-    EXPECT_THAT(dist.dirs[TNE * 3 + 1], testing::Eq(DIR_27_TNE_Y));
-    EXPECT_THAT(dist.dirs[TNE * 3 + 2], testing::Eq(DIR_27_TNE_Z));
-
-    EXPECT_THAT(dist.directions[BSW][0], testing::Eq(DIR_27_BSW_X));
-    EXPECT_THAT(dist.directions[BSW][1], testing::Eq(DIR_27_BSW_Y));
-    EXPECT_THAT(dist.directions[BSW][2], testing::Eq(DIR_27_BSW_Z));
-    EXPECT_THAT(dist.dirs[BSW * 3    ], testing::Eq(DIR_27_BSW_X));
-    EXPECT_THAT(dist.dirs[BSW * 3 + 1], testing::Eq(DIR_27_BSW_Y));
-    EXPECT_THAT(dist.dirs[BSW * 3 + 2], testing::Eq(DIR_27_BSW_Z));
+    EXPECT_THAT(dist.directions[DIR_000][0], testing::Eq(0));
+    EXPECT_THAT(dist.directions[DIR_000][1], testing::Eq(0));
+    EXPECT_THAT(dist.directions[DIR_000][2], testing::Eq(0));
+    EXPECT_THAT(dist.dirs[DIR_000 * 3    ], testing::Eq(0));
+    EXPECT_THAT(dist.dirs[DIR_000 * 3 + 1], testing::Eq(0));
+    EXPECT_THAT(dist.dirs[DIR_000 * 3 + 2], testing::Eq(0));
+
+    EXPECT_THAT(dist.directions[DIR_PP0][0], testing::Eq(DIR_27_NE_X));
+    EXPECT_THAT(dist.directions[DIR_PP0][1], testing::Eq(DIR_27_NE_Y));
+    EXPECT_THAT(dist.directions[DIR_PP0][2], testing::Eq(DIR_27_NE_Z));
+    EXPECT_THAT(dist.dirs[DIR_PP0 * 3    ], testing::Eq(DIR_27_NE_X));
+    EXPECT_THAT(dist.dirs[DIR_PP0 * 3 + 1], testing::Eq(DIR_27_NE_Y));
+    EXPECT_THAT(dist.dirs[DIR_PP0 * 3 + 2], testing::Eq(DIR_27_NE_Z));
+
+    EXPECT_THAT(dist.directions[DIR_0MP][0], testing::Eq(DIR_27_TS_X));
+    EXPECT_THAT(dist.directions[DIR_0MP][1], testing::Eq(DIR_27_TS_Y));
+    EXPECT_THAT(dist.directions[DIR_0MP][2], testing::Eq(DIR_27_TS_Z));
+    EXPECT_THAT(dist.dirs[DIR_0MP * 3    ], testing::Eq(DIR_27_TS_X));
+    EXPECT_THAT(dist.dirs[DIR_0MP * 3 + 1], testing::Eq(DIR_27_TS_Y));
+    EXPECT_THAT(dist.dirs[DIR_0MP * 3 + 2], testing::Eq(DIR_27_TS_Z));
+
+    EXPECT_THAT(dist.directions[DIR_PPP][0], testing::Eq(DIR_27_TNE_X));
+    EXPECT_THAT(dist.directions[DIR_PPP][1], testing::Eq(DIR_27_TNE_Y));
+    EXPECT_THAT(dist.directions[DIR_PPP][2], testing::Eq(DIR_27_TNE_Z));
+    EXPECT_THAT(dist.dirs[DIR_PPP * 3    ], testing::Eq(DIR_27_TNE_X));
+    EXPECT_THAT(dist.dirs[DIR_PPP * 3 + 1], testing::Eq(DIR_27_TNE_Y));
+    EXPECT_THAT(dist.dirs[DIR_PPP * 3 + 2], testing::Eq(DIR_27_TNE_Z));
+
+    EXPECT_THAT(dist.directions[DIR_MMM][0], testing::Eq(DIR_27_BSW_X));
+    EXPECT_THAT(dist.directions[DIR_MMM][1], testing::Eq(DIR_27_BSW_Y));
+    EXPECT_THAT(dist.directions[DIR_MMM][2], testing::Eq(DIR_27_BSW_Z));
+    EXPECT_THAT(dist.dirs[DIR_MMM * 3    ], testing::Eq(DIR_27_BSW_X));
+    EXPECT_THAT(dist.dirs[DIR_MMM * 3 + 1], testing::Eq(DIR_27_BSW_Y));
+    EXPECT_THAT(dist.dirs[DIR_MMM * 3 + 2], testing::Eq(DIR_27_BSW_Z));
 }
\ No newline at end of file
diff --git a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
index 35b3197ff7c3f37eb33809cc9a909f0085d2dffc..7f818f3217e682f21c2b41c62070924243fcb3b0 100644
--- a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
+++ b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.cpp
@@ -59,7 +59,7 @@ void GridVTKWriter::writeSparseGridToVTK(SPtr<Grid> grid, const std::string& nam
     writeVtkFile(grid);
 }
 
-void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name, WRITING_FORMAT format)
+void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name)
 {
     
     const uint chunkSize = 20000000; 
@@ -87,11 +87,9 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name,
 
         *logging::out << logging::Logger::INFO_INTERMEDIATE << "Write Grid to XML VTK (*.vtu) output file : " + name + "_Part_" + std::to_string(part) + "\n";
 
-        nodedatanames.push_back("types");
-        nodedatanames.push_back("sparse_id");
-        nodedatanames.push_back("matrix_id");
-        nodedatanames.push_back("isSendNode");
-        nodedatanames.push_back("isReceiveNode");
+        nodedatanames.emplace_back("types");
+        nodedatanames.emplace_back("sparse_id");
+        nodedatanames.emplace_back("matrix_id");
 
         nodedata.resize(nodedatanames.size());
 
@@ -113,14 +111,12 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name,
                     grid->transIndexToCoords(index, x, y, z);
 
                     nodeNumbers(xIndex, yIndex, zIndex) = nr++;
-                    nodes.push_back(UbTupleFloat3(float(x), float(y), float(z)));
+                    nodes.emplace_back(UbTupleFloat3(float(x), float(y), float(z)));
 
                     const char type = grid->getFieldEntry(grid->transCoordToIndex(x, y, z));
                     nodedata[0].push_back(type);
                     nodedata[1].push_back(grid->getSparseIndex(index));
                     nodedata[2].push_back(index);
-                    nodedata[3].push_back(grid->isSendNode(index));
-                    nodedata[4].push_back(grid->isReceiveNode(index));
                 }
             }
         }
@@ -150,7 +146,7 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name,
                     {
                         Cell cell(x, y, z, grid->getDelta());
                         //if (grid->nodeInCellIs(cell, INVALID_OUT_OF_GRID) || grid->nodeInCellIs(cell, INVALID_COARSE_UNDER_FINE))
-                        //	continue;
+                        // continue;
 
                         cells.push_back(makeUbTuple(uint(SWB), uint(SEB), uint(NEB), uint(NWB), uint(SWT), uint(SET), uint(NET), uint(NWT)));
                     }
@@ -163,7 +159,7 @@ void GridVTKWriter::writeGridToVTKXML(SPtr<Grid> grid, const std::string& name,
 
 }
 
-void GridVTKWriter::writeInterpolationCellsToVTKXML(SPtr<Grid> grid, SPtr<Grid> gridCoarse, const std::string& name, WRITING_FORMAT format)
+void GridVTKWriter::writeInterpolationCellsToVTKXML(SPtr<Grid> grid, SPtr<Grid> gridCoarse, const std::string& name)
 {
     std::vector<char> nodeInterpolationCellType( grid->getSize() );
     for( auto& type : nodeInterpolationCellType ) type = -1;
@@ -202,79 +198,79 @@ void GridVTKWriter::writeInterpolationCellsToVTKXML(SPtr<Grid> grid, SPtr<Grid>
         }
     }
 
-	std::vector<UbTupleFloat3> nodes;
-	std::vector<UbTupleInt8> cells;
-	std::vector<std::string> celldatanames;
-	std::vector< std::vector<double> > celldata;
-
-	celldatanames.push_back("InterpolationCells");
-    celldatanames.push_back("Offset");
-
-	celldata.resize(celldatanames.size());
-
-	CbArray3D<int> nodeNumbers(grid->getNumberOfNodesX(), grid->getNumberOfNodesY(), grid->getNumberOfNodesZ(), -1);
-	int nr = 0;
-
-	for (uint xIndex = 0; xIndex < grid->getNumberOfNodesX(); xIndex++)
-	{
-		for (uint yIndex = 0; yIndex < grid->getNumberOfNodesY(); yIndex++)
-		{
-			for (uint zIndex = 0; zIndex < grid->getNumberOfNodesZ(); zIndex++)
-			{
-				real x, y, z;
-				uint index = 
-					  grid->getNumberOfNodesX() * grid->getNumberOfNodesY() * zIndex
-					+ grid->getNumberOfNodesX() *                             yIndex
-					+ xIndex;
-
-				grid->transIndexToCoords(index, x, y, z);
-
-				nodeNumbers(xIndex, yIndex, zIndex) = nr++;
-				nodes.push_back(UbTupleFloat3(float(x), float(y), float(z)));
-			}
-		}
-	}
-
-	int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
-	for (uint xIndex = 0; xIndex < grid->getNumberOfNodesX() - 1; xIndex++)
-	{
-		for (uint yIndex = 0; yIndex < grid->getNumberOfNodesY() - 1; yIndex++)
-		{
-			for (uint zIndex = 0; zIndex < grid->getNumberOfNodesZ() - 1; zIndex++)
-			{
-				real x, y, z;
-				uint index = grid->getNumberOfNodesX() * grid->getNumberOfNodesY() * zIndex
-					+ grid->getNumberOfNodesX() *                             yIndex
-					+ xIndex;
-
-				grid->transIndexToCoords(index, x, y, z);
-
-				if ((SWB = nodeNumbers(xIndex, yIndex, zIndex)) >= 0
-					&& (SEB = nodeNumbers(xIndex + 1, yIndex, zIndex)) >= 0
-					&& (NEB = nodeNumbers(xIndex + 1, yIndex + 1, zIndex)) >= 0
-					&& (NWB = nodeNumbers(xIndex, yIndex + 1, zIndex)) >= 0
-					&& (SWT = nodeNumbers(xIndex, yIndex, zIndex + 1)) >= 0
-					&& (SET = nodeNumbers(xIndex + 1, yIndex, zIndex + 1)) >= 0
-					&& (NET = nodeNumbers(xIndex + 1, yIndex + 1, zIndex + 1)) >= 0
-					&& (NWT = nodeNumbers(xIndex, yIndex + 1, zIndex + 1)) >= 0)
-				{
-					Cell cell(x, y, z, grid->getDelta());
-					//if (grid->nodeInCellIs(cell, INVALID_OUT_OF_GRID) || grid->nodeInCellIs(cell, INVALID_COARSE_UNDER_FINE))
-					//	continue;
-
-					cells.push_back(makeUbTuple(SWB, SEB, NEB, NWB, SWT, SET, NET, NWT));
-
-				    //const char type = grid->getFieldEntry(grid->transCoordToIndex(nodes[SWB].v1, nodes[SWB].v2.v1, nodes[SWB].v2.v2));
-				    //const char type = grid->getFieldEntry(grid->transCoordToIndex(val<1>(nodes[SWB]), val<2>(nodes[SWB]), val<3>(nodes[SWB])));
-				    const char type = nodeInterpolationCellType[ grid->transCoordToIndex(val<1>(nodes[SWB]), val<2>(nodes[SWB]), val<3>(nodes[SWB])) ];
+    std::vector<UbTupleFloat3> nodes;
+    std::vector<UbTupleInt8> cells;
+    std::vector<std::string> celldatanames;
+    std::vector< std::vector<double> > celldata;
+
+    celldatanames.emplace_back("InterpolationCells");
+    celldatanames.emplace_back("Offset");
+
+    celldata.resize(celldatanames.size());
+
+    CbArray3D<int> nodeNumbers(grid->getNumberOfNodesX(), grid->getNumberOfNodesY(), grid->getNumberOfNodesZ(), -1);
+    int nr = 0;
+
+    for (uint xIndex = 0; xIndex < grid->getNumberOfNodesX(); xIndex++)
+    {
+        for (uint yIndex = 0; yIndex < grid->getNumberOfNodesY(); yIndex++)
+        {
+            for (uint zIndex = 0; zIndex < grid->getNumberOfNodesZ(); zIndex++)
+            {
+                real x, y, z;
+                uint index = 
+                      grid->getNumberOfNodesX() * grid->getNumberOfNodesY() * zIndex
+                    + grid->getNumberOfNodesX() *                             yIndex
+                    + xIndex;
+
+                grid->transIndexToCoords(index, x, y, z);
+
+                nodeNumbers(xIndex, yIndex, zIndex) = nr++;
+                nodes.emplace_back(UbTupleFloat3(float(x), float(y), float(z)));
+            }
+        }
+    }
+
+    int SWB, SEB, NEB, NWB, SWT, SET, NET, NWT;
+    for (uint xIndex = 0; xIndex < grid->getNumberOfNodesX() - 1; xIndex++)
+    {
+        for (uint yIndex = 0; yIndex < grid->getNumberOfNodesY() - 1; yIndex++)
+        {
+            for (uint zIndex = 0; zIndex < grid->getNumberOfNodesZ() - 1; zIndex++)
+            {
+                real x, y, z;
+                uint index = grid->getNumberOfNodesX() * grid->getNumberOfNodesY() * zIndex
+                    + grid->getNumberOfNodesX() *                             yIndex
+                    + xIndex;
+
+                grid->transIndexToCoords(index, x, y, z);
+
+                if ((SWB = nodeNumbers(xIndex, yIndex, zIndex)) >= 0
+                    && (SEB = nodeNumbers(xIndex + 1, yIndex, zIndex)) >= 0
+                    && (NEB = nodeNumbers(xIndex + 1, yIndex + 1, zIndex)) >= 0
+                    && (NWB = nodeNumbers(xIndex, yIndex + 1, zIndex)) >= 0
+                    && (SWT = nodeNumbers(xIndex, yIndex, zIndex + 1)) >= 0
+                    && (SET = nodeNumbers(xIndex + 1, yIndex, zIndex + 1)) >= 0
+                    && (NET = nodeNumbers(xIndex + 1, yIndex + 1, zIndex + 1)) >= 0
+                    && (NWT = nodeNumbers(xIndex, yIndex + 1, zIndex + 1)) >= 0)
+                {
+                    Cell cell(x, y, z, grid->getDelta());
+                    //if (grid->nodeInCellIs(cell, INVALID_OUT_OF_GRID) || grid->nodeInCellIs(cell, INVALID_COARSE_UNDER_FINE))
+                    //    continue;
+
+                    cells.push_back(makeUbTuple(SWB, SEB, NEB, NWB, SWT, SET, NET, NWT));
+
+                    //const char type = grid->getFieldEntry(grid->transCoordToIndex(nodes[SWB].v1, nodes[SWB].v2.v1, nodes[SWB].v2.v2));
+                    //const char type = grid->getFieldEntry(grid->transCoordToIndex(val<1>(nodes[SWB]), val<2>(nodes[SWB]), val<3>(nodes[SWB])));
+                    const char type = nodeInterpolationCellType[ grid->transCoordToIndex(val<1>(nodes[SWB]), val<2>(nodes[SWB]), val<3>(nodes[SWB])) ];
                     const char offset = nodeOffset               [ grid->transCoordToIndex(val<1>(nodes[SWB]), val<2>(nodes[SWB]), val<3>(nodes[SWB])) ];
 
                     celldata[0].push_back( type );
                     celldata[1].push_back( offset );
-				}
-			}
-		}
-	}
+                }
+            }
+        }
+    }
     WbWriterVtkXmlBinary::getInstance()->writeOctsWithCellData(name, nodes, cells, celldatanames, celldata);
 }
 
@@ -323,18 +319,18 @@ void GridVTKWriter::openFile(const std::string& name, const std::string& mode)
 void GridVTKWriter::closeFile()
 {
     GridVTKWriter::end_line();
-	fclose(file);
+    fclose(file);
 }
 
 void GridVTKWriter::writeHeader()
 {
-	fprintf(file, "# vtk DataFile Version 3.0\n");
-	fprintf(file, "by MeshGenerator\n");
-	if (isBinaryWritingFormat())
-		fprintf(file, "BINARY\n");
-	else
-		fprintf(file, "ASCII\n");
-	fprintf(file, "DATASET UNSTRUCTURED_GRID\n");
+    fprintf(file, "# vtk DataFile Version 3.0\n");
+    fprintf(file, "by MeshGenerator\n");
+    if (isBinaryWritingFormat())
+        fprintf(file, "BINARY\n");
+    else
+        fprintf(file, "ASCII\n");
+    fprintf(file, "DATASET UNSTRUCTURED_GRID\n");
 }
 
 void GridVTKWriter::writePoints(SPtr<Grid> grid)
@@ -360,34 +356,34 @@ void GridVTKWriter::writePoints(SPtr<Grid> grid)
 
 void GridVTKWriter::writeCells(const unsigned int &size)
 {
-	fprintf(file, "\nCELLS %d %d\n", size, size * 2);
-	for (unsigned int i = 0; i < size; ++i)
-	{
-		if (isBinaryWritingFormat()){
-			write_int(1);
-			write_int(i);
-		}
-		else
-			fprintf(file, "1 %d\n", i);
-	}
-
-	fprintf(file, "\nCELL_TYPES %d\n", size);
-	for (unsigned int i = 0; i < size; ++i)
-	{
-		if (isBinaryWritingFormat())
-			write_int(1);
-		else
-			fprintf(file, "1 ");
-	}
-	if (!isBinaryWritingFormat())
+    fprintf(file, "\nCELLS %u %u\n", size, size * 2);
+    for (unsigned int i = 0; i < size; ++i)
+    {
+        if (isBinaryWritingFormat()){
+            write_int(1);
+            write_int(i);
+        }
+        else
+            fprintf(file, "1 %u\n", i);
+    }
+
+    fprintf(file, "\nCELL_TYPES %u\n", size);
+    for (unsigned int i = 0; i < size; ++i)
+    {
+        if (isBinaryWritingFormat())
+            write_int(1);
+        else
+            fprintf(file, "1 ");
+    }
+    if (!isBinaryWritingFormat())
         GridVTKWriter::end_line();
 }
 
 void GridVTKWriter::writeTypeHeader(const unsigned int &size)
 {
-	fprintf(file, "\nPOINT_DATA %d\n", size);
-	fprintf(file, "SCALARS type int\n");
-	fprintf(file, "LOOKUP_TABLE default\n");
+    fprintf(file, "\nPOINT_DATA %u\n", size);
+    fprintf(file, "SCALARS type int\n");
+    fprintf(file, "LOOKUP_TABLE default\n");
 }
 
 void GridVTKWriter::writeTypes(SPtr<Grid> grid)
@@ -406,38 +402,38 @@ void GridVTKWriter::writeTypes(SPtr<Grid> grid)
 
 void GridVTKWriter::end_line()
 {
-	char str2[8] = "\n";
-	fprintf(file, "%s", str2);
+    char str2[8] = "\n";
+    fprintf(file, "%s", str2);
 }
 
 void GridVTKWriter::write_int(int val)
 {
-	force_big_endian((unsigned char *)&val);
-	fwrite(&val, sizeof(int), 1, file);
+    force_big_endian((unsigned char *)&val);
+    fwrite(&val, sizeof(int), 1, file);
 }
 
 void GridVTKWriter::write_float(float val)
 {
-	force_big_endian((unsigned char *)&val);
-	fwrite(&val, sizeof(float), 1, file);
+    force_big_endian((unsigned char *)&val);
+    fwrite(&val, sizeof(float), 1, file);
 }
 
 
 void GridVTKWriter::force_big_endian(unsigned char *bytes)
 {
-	bool shouldSwap = false;
-	int tmp1 = 1;
-	unsigned char *tmp2 = (unsigned char *)&tmp1;
-	if (*tmp2 != 0)
-		shouldSwap = true;
-
-	if (shouldSwap)
-	{
-		unsigned char tmp = bytes[0];
-		bytes[0] = bytes[3];
-		bytes[3] = tmp;
-		tmp = bytes[1];
-		bytes[1] = bytes[2];
-		bytes[2] = tmp;
-	}
+    bool shouldSwap = false;
+    int tmp1 = 1;
+    unsigned char *tmp2 = (unsigned char *)&tmp1;
+    if (*tmp2 != 0)
+        shouldSwap = true;
+
+    if (shouldSwap)
+    {
+        unsigned char tmp = bytes[0];
+        bytes[0] = bytes[3];
+        bytes[3] = tmp;
+        tmp = bytes[1];
+        bytes[1] = bytes[2];
+        bytes[2] = tmp;
+    }
 }
diff --git a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.h b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.h
index cf33df096a6e670b65f79831d59927e3d7cea389..f8458ae27bccd4b51db6b13544a708c4d8518051 100644
--- a/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.h
+++ b/src/gpu/GridGenerator/io/GridVTKWriter/GridVTKWriter.h
@@ -46,12 +46,12 @@ class GRIDGENERATOR_EXPORT GridVTKWriter
 {
 public:
     static void writeSparseGridToVTK(SPtr<Grid> grid, const std::string& name, WRITING_FORMAT format = WRITING_FORMAT::ASCII);
-    static void writeGridToVTKXML(SPtr<Grid> grid, const std::string& name, WRITING_FORMAT format = WRITING_FORMAT::ASCII);
-    static void writeInterpolationCellsToVTKXML(SPtr<Grid> grid, SPtr<Grid> gridCoarse, const std::string& name, WRITING_FORMAT format = WRITING_FORMAT::ASCII);
+    static void writeGridToVTKXML(SPtr<Grid> grid, const std::string& name);
+    static void writeInterpolationCellsToVTKXML(SPtr<Grid> grid, SPtr<Grid> gridCoarse, const std::string& name);
 
 private:
-    GridVTKWriter() {}
-    ~GridVTKWriter() {}
+    GridVTKWriter() = default;
+    ~GridVTKWriter() = default;
 
     static FILE *file;
     static WRITING_FORMAT format;
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
index 320a6e5fb7bb8e52a335722bca71d7d6a2a0c6de..23fb0f4e7f3e16702e9cb2459606986af1032e49 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.cpp
@@ -37,7 +37,6 @@
 #include <iomanip>
 #include <omp.h>
 #include <cmath>
-#include <stdint.h>
 
 #include "Core/Timer/Timer.h"
 
@@ -56,7 +55,7 @@ using namespace vf::gpu;
 /*#################################################################################*/
 /*---------------------------------public methods----------------------------------*/
 /*---------------------------------------------------------------------------------*/
-void SimulationFileWriter::write(std::string folder, SPtr<GridBuilder> builder, FILEFORMAT format)
+void SimulationFileWriter::write(const std::string& folder, SPtr<GridBuilder> builder, FILEFORMAT format)
 {
     SimulationFileWriter::folder = folder;
 
@@ -133,7 +132,7 @@ void SimulationFileWriter::openFiles(SPtr<GridBuilder> builder)
     qNames.push_back(path + simulationFileNames::bottomBoundaryQ);
     qNames.push_back(path + simulationFileNames::frontBoundaryQ);
     qNames.push_back(path + simulationFileNames::backBoundaryQ);
-	qNames.push_back(path + simulationFileNames::geomBoundaryQ);
+    qNames.push_back(path + simulationFileNames::geomBoundaryQ);
 
     std::vector<std::string> valueNames;
     valueNames.push_back(path + simulationFileNames::inletBoundaryValues);
@@ -142,7 +141,7 @@ void SimulationFileWriter::openFiles(SPtr<GridBuilder> builder)
     valueNames.push_back(path + simulationFileNames::bottomBoundaryValues);
     valueNames.push_back(path + simulationFileNames::frontBoundaryValues);
     valueNames.push_back(path + simulationFileNames::backBoundaryValues);
-	valueNames.push_back(path + simulationFileNames::geomBoundaryValues);
+    valueNames.push_back(path + simulationFileNames::geomBoundaryValues);
 
     for (int i = 0; i < QFILES; i++){
         SPtr<std::ofstream> outQ(new std::ofstream);
@@ -232,7 +231,7 @@ void SimulationFileWriter::writeLevelSize(uint numberOfNodes, FILEFORMAT format)
     const std::string zeroGeo = "16 ";
 
     if (format == FILEFORMAT::BINARY)
-	{
+    {
         //const uint zeroIndex = 0;
         //const uint zeroGeo   = 16;
 
@@ -258,7 +257,7 @@ void SimulationFileWriter::writeLevelSize(uint numberOfNodes, FILEFORMAT format)
         geoVecFile      << numberOfNodes << "\n" << zeroGeo  ;
     }
     else 
-	{
+    {
         xCoordFile      << numberOfNodes << "\n" << zeroIndex << "\n";
         yCoordFile      << numberOfNodes << "\n" << zeroIndex << "\n";
         zCoordFile      << numberOfNodes << "\n" << zeroIndex << "\n";
@@ -319,7 +318,7 @@ void SimulationFileWriter::writeCoordsNeighborsGeo(SPtr<GridBuilder> builder, in
     grid->transIndexToCoords(index, x, y, z);
 
     if (format == FILEFORMAT::BINARY)
-	{
+    {
         double tmpX = (double)x;
         double tmpY = (double)y;
         double tmpZ = (double)z;
@@ -342,7 +341,7 @@ void SimulationFileWriter::writeCoordsNeighborsGeo(SPtr<GridBuilder> builder, in
         geoVecFile.write((char*)&type, sizeof(unsigned int));
     }
     else 
-	{
+    {
         xCoordFile << x << "\n";
         yCoordFile << y << "\n";
         zCoordFile << z << "\n";
@@ -446,7 +445,7 @@ void SimulationFileWriter::writeGridInterfaceOffsetToFile(uint numberOfNodes, st
 std::vector<std::vector<std::vector<real> > > SimulationFileWriter::createBCVectors(SPtr<Grid> grid)
 {
     std::vector<std::vector<std::vector<real> > > qs;
-	qs.resize(QFILES);
+    qs.resize(QFILES);
     for (uint i = 0; i < grid->getSize(); i++)
     {
         real x, y, z;
@@ -472,9 +471,9 @@ void SimulationFileWriter::addShortQsToVector(int index, std::vector<std::vector
 
     for (int i = grid->getEndDirection(); i >= 0; i--)
     {
-		/*int qIndex = i * grid->getSize() + grid->getSparseIndex(index);
-		real q = grid->getDistribution()[qIndex];*/
-		real q = grid->getQValue(index, i);
+        /*int qIndex = i * grid->getSize() + grid->getSparseIndex(index);
+        real q = grid->getDistribution()[qIndex];*/
+        real q = grid->getQValue(index, i);
         if (q > 0) {
             //printf("Q%d (old:%d, new:%d), : %2.8f \n", i, coordsVec[index].matrixIndex, index, grid.d.f[i * grid.size + coordsVec[index].matrixIndex]);
             qKey += (uint32_t)pow(2, 26 - i);
@@ -499,10 +498,10 @@ void SimulationFileWriter::addQsToVector(int index, std::vector<std::vector<std:
     {
         //int qIndex = i * grid->getSize() + grid->getSparseIndex(index);
         //real q = grid->getDistribution()[qIndex];
-		real q = grid->getQValue(index, i);
+        real q = grid->getQValue(index, i);
         qNode.push_back(q);
-		if (q > 0)
-			printf("Q= %f; Index = %d \n", q, index);
+        if (q > 0)
+            printf("Q= %f; Index = %d \n", q, index);
             //qNode.push_back(q);
   //      else
   //          qNode.push_back(-1);
@@ -540,8 +539,8 @@ void SimulationFileWriter::writeBoundaryQsFile(SPtr<GridBuilder> builder)
   //  for (int rb = 0; rb < QFILES; rb++) {
   //      for (int index = 0; index < qFiles[rb].size(); index++) {
   //          //writeBoundary(qFiles[rb][index], rb);
-		//	writeBoundaryShort(qFiles[rb][index], rb);
-		//}
+        //    writeBoundaryShort(qFiles[rb][index], rb);
+        //}
   //  }
 
     SideType sides[] = {SideType::MX, SideType::PX, SideType::PZ, SideType::MZ, SideType::MY, SideType::PY, SideType::GEOMETRY};
@@ -593,18 +592,18 @@ void SimulationFileWriter::writeBoundary(std::vector<real> boundary, int rb)
 
 void SimulationFileWriter::writeBoundaryShort(std::vector<real> boundary, int rb)
 {
-	uint32_t key = *((uint32_t*)&boundary[boundary.size() - 2]);
-	int index = (int)boundary[boundary.size() - 1];
+    uint32_t key = *((uint32_t*)&boundary[boundary.size() - 2]);
+    int index = (int)boundary[boundary.size() - 1];
 
-	*qStreams[rb] << (index + 1) << " " << key;
+    *qStreams[rb] << (index + 1) << " " << key;
 
-	for (std::size_t i = 0; i < boundary.size() - 2; i++) {
-		*qStreams[rb] << " " << std::fixed << std::setprecision(16) << boundary[i];
-	}
-	*valueStreams[rb] << (index + 1) << " 0 0 0";
+    for (std::size_t i = 0; i < boundary.size() - 2; i++) {
+        *qStreams[rb] << " " << std::fixed << std::setprecision(16) << boundary[i];
+    }
+    *valueStreams[rb] << (index + 1) << " 0 0 0";
 
-	*qStreams[rb] << "\n";
-	*valueStreams[rb] << "\n";
+    *qStreams[rb] << "\n";
+    *valueStreams[rb] << "\n";
 }
 
 void SimulationFileWriter::writeBoundaryShort(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint side)
diff --git a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
index 4a4552f74b69949865e233014d74ac7168b36b31..f3851abfd3372e5d3548cf7c0cd02344aa8acbaa 100644
--- a/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
+++ b/src/gpu/GridGenerator/io/SimulationFileWriter/SimulationFileWriter.h
@@ -60,7 +60,7 @@ enum class FILEFORMAT
 class SimulationFileWriter : private NonCreatable
 {
 public:
-    GRIDGENERATOR_EXPORT static void write(std::string folder, SPtr<GridBuilder> builder, FILEFORMAT format);
+    GRIDGENERATOR_EXPORT static void write(const std::string& folder, SPtr<GridBuilder> builder, FILEFORMAT format);
 
 private:
     static void write(SPtr<GridBuilder> builder, FILEFORMAT format);
@@ -85,12 +85,12 @@ private:
     static void addQsToVector(int index, std::vector<std::vector<std::vector<real> > > &qs, SPtr<Grid> grid);
     static void fillRBForNode(int index, int direction, int directionSign, int rb, std::vector<std::vector<std::vector<real> > > &qs, SPtr<Grid> grid);
     static void writeBoundary(std::vector<real> boundary, int rb);
-	static void writeBoundaryShort(std::vector<real> boundary, int rb);
-	static void writeBoundaryShort(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint side);
+    static void writeBoundaryShort(std::vector<real> boundary, int rb);
+    static void writeBoundaryShort(SPtr<Grid> grid, SPtr<gg::BoundaryCondition> boundaryCondition, uint side);
 
     static void writeCommunicationFiles(SPtr<GridBuilder> builder);
 
-	static void closeFiles();
+    static void closeFiles();
 
 
     static std::ofstream xCoordFile;
diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp
index c4e42d765ba42833501ce66ee2ee53d016102d08..01541f8a4a5faab8d70e9e26b815fa5f79fcaf4d 100644
--- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp
+++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.cpp
@@ -99,9 +99,15 @@ boundaryCondition BoundaryConditionFactory::getSlipBoundaryConditionPost(bool is
         case SlipBC::SlipCompressible:
             return QSlipDevComp27;
             break;
+        case SlipBC::SlipBounceBack:
+            return BBSlipDevComp27;
+            break;
         case SlipBC::SlipCompressibleTurbulentViscosity:
             return QSlipDevCompTurbulentViscosity27;
             break;
+        case SlipBC::SlipPressureCompressibleTurbulentViscosity:
+            return QSlipPressureDevCompTurbulentViscosity27;
+            break;
         default:
             return nullptr;
     }
@@ -126,17 +132,22 @@ boundaryCondition BoundaryConditionFactory::getPressureBoundaryConditionPre() co
         case PressureBC::OutflowNonReflective:
             return QPressNoRhoDev27;
             break;
+        case PressureBC::OutflowNonReflectivePressureCorrection:
+            return QPressZeroRhoOutflowDev27;
         default:
             return nullptr;
     }
 }
 
-boundaryConditionPara BoundaryConditionFactory::getStressBoundaryConditionPost() const
+boundaryConditionWithParameter BoundaryConditionFactory::getStressBoundaryConditionPost() const
 {
     switch (this->stressBoundaryCondition) {
         case StressBC::StressBounceBack:
             return BBStressDev27;
             break;
+        case StressBC::StressPressureBounceBack:
+            return BBStressPressureDev27;
+            break;
         case StressBC::StressCompressible:
             return QStressDevComp27;
             break;
@@ -154,4 +165,4 @@ boundaryCondition BoundaryConditionFactory::getGeometryBoundaryConditionPost() c
     else if (std::holds_alternative<SlipBC>(this->geometryBoundaryCondition))
         return this->getSlipBoundaryConditionPost(true);
     return nullptr;
-}
\ No newline at end of file
+}
diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h
index c2d374c7df97eb83d363417dc4a13b42b7312cab..7babebecf183744bc6ace6e687f35fad1c7e2e92 100644
--- a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h
+++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactory.h
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file BoundaryConditionFactory.h
+//! \ingroup BoundaryCondition
+//! \author Anna Wellmann
+//=======================================================================================#ifndef BC_FACTORY
 #ifndef BC_FACTORY
 #define BC_FACTORY
 
@@ -8,13 +40,13 @@
 
 #include "LBM/LB.h"
 #include "Parameter/Parameter.h"
-#include "grid/BoundaryConditions/Side.h"
+#include "gpu/GridGenerator/grid/BoundaryConditions/Side.h"
 
 struct LBMSimulationParameter;
 class Parameter;
 
 using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>;
-using boundaryConditionPara = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
+using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
 
 class BoundaryConditionFactory
 {
@@ -29,7 +61,9 @@ public:
         VelocityCompressible,
         //! - VelocityAndPressureCompressible = interpolated velocity boundary condition, based on subgrid distances.
         //! Also sets the pressure to the bulk pressure. Can be combined with OutflowNonReflective
-        VelocityAndPressureCompressible
+        VelocityAndPressureCompressible,
+        //! - NotSpecified =  the user did not set a boundary condition
+        NotSpecified
     };
 
     //! \brief An enumeration for selecting a no-slip boundary condition
@@ -53,9 +87,14 @@ public:
         SlipIncompressible,
         //! - SlipCompressible = interpolated slip boundary condition, based on subgrid distances
         SlipCompressible,
-        //! - SlipCompressible = interpolated slip boundary condition, based on subgrid distances.
+        //! - SlipBounceBack = simple bounce-back slip boundary condition.
+        SlipBounceBack,
         //! With turbulent viscosity -> para->setUseTurbulentViscosity(true) has to be set to true
-        SlipCompressibleTurbulentViscosity
+        SlipCompressibleTurbulentViscosity,
+        //! With turbulent viscosity -> para->setUseTurbulentViscosity(true) has to be set to true
+        SlipPressureCompressibleTurbulentViscosity,
+        //! - NotSpecified =  the user did not set a boundary condition
+        NotSpecified
     };
 
     //! \brief An enumeration for selecting a pressure boundary condition
@@ -69,7 +108,11 @@ public:
         //! - PressureNonEquilibriumCompressible = pressure boundary condition based on non-equilibrium
         PressureNonEquilibriumCompressible,
         //! - OutflowNonReflective = outflow boundary condition, should be combined with VelocityAndPressureCompressible
-        OutflowNonReflective
+        OutflowNonReflective,
+        //! - OutflowNonreflectivePressureCorrection = like OutflowNonReflective, but also reduces pressure overshoot
+        OutflowNonReflectivePressureCorrection,
+        //! - NotSpecified =  the user did not set a boundary condition
+        NotSpecified
     };
 
     //! \brief An enumeration for selecting a stress boundary condition
@@ -77,7 +120,11 @@ public:
         //! - StressCompressible
         StressCompressible,
         //! - StressBounceBack
-        StressBounceBack
+        StressBounceBack,
+        //! - StressPressureBounceBack
+        StressPressureBounceBack,
+        //! - NotSpecified =  the user did not set a boundary condition
+        NotSpecified
     };
 
     // enum class OutflowBoundaryCondition {};  // TODO:
@@ -88,35 +135,35 @@ public:
     void setSlipBoundaryCondition(const BoundaryConditionFactory::SlipBC boundaryConditionType);
     void setPressureBoundaryCondition(const BoundaryConditionFactory::PressureBC boundaryConditionType);
     void setStressBoundaryCondition(const BoundaryConditionFactory::StressBC boundaryConditionType);
-    //!param boundaryConditionType: a velocity, no-slip or slip boundary condition
+    //! \brief set a boundary condition for the geometry
+    //! param boundaryConditionType: a velocity, no-slip or slip boundary condition
     //! \details suggestions for boundaryConditionType:
     //!
     //! - velocity: VelocityIncompressible, VelocityCompressible, VelocityAndPressureCompressible
     //!
-    //! - no-slip:  NoSlipBounceBack, NoSlipIncompressible, NoSlipCompressible, NoSlip3rdMomentsCompressible
+    //! - no-slip: NoSlipBounceBack, NoSlipIncompressible, NoSlipCompressible, NoSlip3rdMomentsCompressible
     //!
-    //! - slip:     SlipIncompressible
+    //! - slip: only use a slip boundary condition which sets the normals
     void setGeometryBoundaryCondition(const std::variant<VelocityBC, NoSlipBC, SlipBC> boundaryConditionType);
 
     // void setOutflowBoundaryCondition(...); // TODO:
     // https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16
 
-    boundaryCondition getVelocityBoundaryConditionPost(bool isGeometryBC = false) const;
-    boundaryCondition getNoSlipBoundaryConditionPost(bool isGeometryBC = false) const;
-    boundaryCondition getSlipBoundaryConditionPost(bool isGeometryBC = false) const;
-    boundaryCondition getPressureBoundaryConditionPre() const;
-    boundaryCondition getGeometryBoundaryConditionPost() const;
+    [[nodiscard]] boundaryCondition getVelocityBoundaryConditionPost(bool isGeometryBC = false) const;
+    [[nodiscard]] boundaryCondition getNoSlipBoundaryConditionPost(bool isGeometryBC = false) const;
+    [[nodiscard]] boundaryCondition getSlipBoundaryConditionPost(bool isGeometryBC = false) const;
+    [[nodiscard]] boundaryCondition getPressureBoundaryConditionPre() const;
+    [[nodiscard]] boundaryCondition getGeometryBoundaryConditionPost() const;
 
-    boundaryConditionPara getStressBoundaryConditionPost() const;
+    [[nodiscard]] boundaryConditionWithParameter getStressBoundaryConditionPost() const;
 
 private:
-    VelocityBC velocityBoundaryCondition;
+    VelocityBC velocityBoundaryCondition = VelocityBC::NotSpecified;
     NoSlipBC noSlipBoundaryCondition = NoSlipBC::NoSlipImplicitBounceBack;
-    SlipBC slipBoundaryCondition;
-    PressureBC pressureBoundaryCondition;
-    std::variant<VelocityBC, NoSlipBC, SlipBC> geometryBoundaryCondition  = NoSlipBC::NoSlipImplicitBounceBack;
-    StressBC stressBoundaryCondition;
-
+    SlipBC slipBoundaryCondition = SlipBC::NotSpecified;
+    PressureBC pressureBoundaryCondition = PressureBC::NotSpecified;
+    std::variant<VelocityBC, NoSlipBC, SlipBC> geometryBoundaryCondition = NoSlipBC::NoSlipImplicitBounceBack;
+    StressBC stressBoundaryCondition = StressBC::NotSpecified;
 
     // OutflowBoundaryConditon outflowBC // TODO: https://git.rz.tu-bs.de/m.schoenherr/VirtualFluids_dev/-/issues/16
 };
diff --git a/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactoryTest.cpp b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactoryTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..14d01d6df3334d767d97e8db87b7e2a9964022e3
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/BoundaryConditions/BoundaryConditionFactoryTest.cpp
@@ -0,0 +1,238 @@
+#include <gmock/gmock.h>
+#include <typeindex>
+
+#include "BoundaryConditionFactory.h"
+#include "GPU/GPU_Interface.h"
+#include "gpu/GridGenerator/grid/BoundaryConditions/BoundaryCondition.h"
+
+using bcFunction = void (*)(LBMSimulationParameter *, QforBoundaryConditions *);
+using bcFunctionParamter = void (*)(Parameter *, QforBoundaryConditions *, const int level);
+
+// tests for default boundary conditions
+TEST(BoundaryConditionFactoryTest, defaultVelocityBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getVelocityBoundaryConditionPost();
+    EXPECT_THAT(bc, testing::Eq(nullptr));
+    EXPECT_THROW(bc(nullptr, nullptr), std::bad_function_call);
+}
+
+TEST(BoundaryConditionFactoryTest, defaultNoSlipBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getNoSlipBoundaryConditionPost();
+    EXPECT_NO_THROW(bc(nullptr, nullptr)); // empty lambda function should not throw
+}
+
+TEST(BoundaryConditionFactoryTest, defaultSlipBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getSlipBoundaryConditionPost();
+    EXPECT_THAT(bc, testing::Eq(nullptr));
+    EXPECT_THROW(bc(nullptr, nullptr), std::bad_function_call);
+}
+
+TEST(BoundaryConditionFactoryTest, defaultPressureBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getPressureBoundaryConditionPre();
+    EXPECT_THAT(bc, testing::Eq(nullptr));
+    EXPECT_THROW(bc(nullptr, nullptr), std::bad_function_call);
+}
+
+TEST(BoundaryConditionFactoryTest, defaultGeometryBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getGeometryBoundaryConditionPost();
+    EXPECT_NO_THROW(bc(nullptr, nullptr)); // empty lambda function should not throw
+}
+
+TEST(BoundaryConditionFactoryTest, defaultStressBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+    auto bc = bcFactory.getStressBoundaryConditionPost();
+    EXPECT_THAT(bc, testing::Eq(nullptr));
+    EXPECT_THROW(bc(nullptr, nullptr, 0), std::bad_function_call);
+}
+
+bcFunction getVelocityBcTarget(BoundaryConditionFactory &bcFactory)
+{
+    auto bc = bcFactory.getVelocityBoundaryConditionPost();
+    void (*bcTarget)(LBMSimulationParameter *, QforBoundaryConditions *) =
+        (*bc.target<void (*)(LBMSimulationParameter *, QforBoundaryConditions *)>());
+    return bcTarget;
+}
+
+// tests for boundary conditions whcih are set by the user (tests both set and get functions)
+
+TEST(BoundaryConditionFactoryTest, velocityBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocitySimpleBounceBackCompressible);
+    EXPECT_TRUE(*(getVelocityBcTarget(bcFactory)) == QVelDevicePlainBB27)
+        << "The returned boundary condition is not the expected function QVelDevicePlainBB27.";
+
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityIncompressible);
+    EXPECT_TRUE(*(getVelocityBcTarget(bcFactory)) == QVelDev27)
+        << "The returned boundary condition is not the expected function QVelDev27.";
+
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
+    EXPECT_TRUE(*(getVelocityBcTarget(bcFactory)) == QVelDevComp27)
+        << "The returned boundary condition is not the expected function QVelDevComp27.";
+
+    bcFactory.setVelocityBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
+    EXPECT_TRUE(*(getVelocityBcTarget(bcFactory)) == QVelDevCompZeroPress27)
+        << "The returned boundary condition is not the expected function QVelDevCompZeroPress27.";
+}
+
+bcFunction getNoSlipBcTarget(BoundaryConditionFactory &bcFactory)
+{
+    auto bc = bcFactory.getNoSlipBoundaryConditionPost();
+    void (*bcTarget)(LBMSimulationParameter *, QforBoundaryConditions *) =
+        (*bc.target<void (*)(LBMSimulationParameter *, QforBoundaryConditions *)>());
+    return bcTarget;
+}
+
+TEST(BoundaryConditionFactoryTest, noSlipBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipImplicitBounceBack);
+    auto bc = bcFactory.getNoSlipBoundaryConditionPost();
+    EXPECT_NO_THROW(bc(nullptr, nullptr)); // empty lambda function should not throw
+
+    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipBounceBack);
+    EXPECT_TRUE( *(getNoSlipBcTarget(bcFactory)) == BBDev27)
+        << "The returned boundary condition is not the expected function BBDev27.";
+
+    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipIncompressible);
+    EXPECT_TRUE( *(getNoSlipBcTarget(bcFactory)) == QDev27)
+        << "The returned boundary condition is not the expected function QDev27.";
+
+    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
+    EXPECT_TRUE( *(getNoSlipBcTarget(bcFactory)) == QDevComp27)
+        << "The returned boundary condition is not the expected function QDevComp27.";
+
+    bcFactory.setNoSlipBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlip3rdMomentsCompressible);
+    EXPECT_TRUE( *(getNoSlipBcTarget(bcFactory)) == QDev3rdMomentsComp27)
+        << "The returned boundary condition is not the expected function BBDev27.";
+}
+
+bcFunction getSlipBcTarget(BoundaryConditionFactory &bcFactory)
+{
+    auto bc = bcFactory.getSlipBoundaryConditionPost();
+    void (*bcTarget)(LBMSimulationParameter *, QforBoundaryConditions *) =
+        (*bc.target<void (*)(LBMSimulationParameter *, QforBoundaryConditions *)>());
+    return bcTarget;
+}
+
+TEST(BoundaryConditionFactoryTest, slipBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipIncompressible);
+    EXPECT_TRUE( *(getSlipBcTarget(bcFactory)) == QSlipDev27)
+        << "The returned boundary condition is not the expected function QSlipDev27.";
+
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressible);
+    EXPECT_TRUE( *(getSlipBcTarget(bcFactory)) == QSlipDevComp27)
+        << "The returned boundary condition is not the expected function QSlipDevComp27.";
+
+    bcFactory.setSlipBoundaryCondition(BoundaryConditionFactory::SlipBC::SlipCompressibleTurbulentViscosity);
+    EXPECT_TRUE( *(getSlipBcTarget(bcFactory)) == QSlipDevCompTurbulentViscosity27)
+        << "The returned boundary condition is not the expected function QSlipDevCompTurbulentViscosity27.";
+}
+
+bcFunction getPressureBcTarget(BoundaryConditionFactory &bcFactory)
+{
+    auto bc = bcFactory.getPressureBoundaryConditionPre();
+    void (*bcTarget)(LBMSimulationParameter *, QforBoundaryConditions *) =
+        (*bc.target<void (*)(LBMSimulationParameter *, QforBoundaryConditions *)>());
+    return bcTarget;
+}
+
+TEST(BoundaryConditionFactoryTest, pressureBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureEquilibrium);
+    EXPECT_TRUE( *(getPressureBcTarget(bcFactory)) == QPressDev27)
+        << "The returned boundary condition is not the expected function QPressDev27.";
+
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureEquilibrium2);
+    EXPECT_TRUE( *(getPressureBcTarget(bcFactory)) == QPressDevEQZ27)
+        << "The returned boundary condition is not the expected function QPressDevEQZ27.";
+
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumIncompressible);
+    EXPECT_TRUE( *(getPressureBcTarget(bcFactory)) == QPressDevIncompNEQ27)
+        << "The returned boundary condition is not the expected function QPressDevIncompNEQ27.";
+
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::PressureNonEquilibriumCompressible);
+    EXPECT_TRUE( *(getPressureBcTarget(bcFactory)) == QPressDevNEQ27)
+        << "The returned boundary condition is not the expected function QPressDevNEQ27.";
+
+    bcFactory.setPressureBoundaryCondition(BoundaryConditionFactory::PressureBC::OutflowNonReflective);
+    EXPECT_TRUE( *(getPressureBcTarget(bcFactory)) == QPressNoRhoDev27)
+        << "The returned boundary condition is not the expected function QPressNoRhoDev27.";
+}
+
+bcFunction getGeometryBcTarget(BoundaryConditionFactory &bcFactory)
+{
+    auto bc = bcFactory.getGeometryBoundaryConditionPost();
+    void (*bcTarget)(LBMSimulationParameter *, QforBoundaryConditions *) =
+        (*bc.target<void (*)(LBMSimulationParameter *, QforBoundaryConditions *)>());
+    return bcTarget;
+}
+
+TEST(BoundaryConditionFactoryTest, geometryBC)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    // velocity
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityIncompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QVelDev27)
+        << "The returned boundary condition is not the expected function QVelDev27.";
+
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityCompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QVelDevComp27)
+        << "The returned boundary condition is not the expected function QVelDevComp27.";
+
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::VelocityBC::VelocityAndPressureCompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QVelDevCompZeroPress27)
+        << "The returned boundary condition is not the expected function QVelDevCompZeroPress27.";
+
+    // no slip
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipImplicitBounceBack);
+    auto bc = bcFactory.getGeometryBoundaryConditionPost();
+    EXPECT_NO_THROW(bc(nullptr, nullptr)); // empty lambda function should not throw
+
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipIncompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QDev27)
+        << "The returned boundary condition is not the expected function QDev27.";
+
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlipCompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QDevComp27)
+        << "The returned boundary condition is not the expected function QDevComp27.";
+
+    bcFactory.setGeometryBoundaryCondition(BoundaryConditionFactory::NoSlipBC::NoSlip3rdMomentsCompressible);
+    EXPECT_TRUE( *(getGeometryBcTarget(bcFactory)) == QDev3rdMomentsComp27)
+        << "The returned boundary condition is not the expected function QDev3rdMomentsComp27.";
+}
+
+TEST(BoundaryConditionFactoryTest, stressBoundaryConditions)
+{
+    auto bcFactory = BoundaryConditionFactory();
+
+    bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressBounceBack);
+    auto bc = bcFactory.getStressBoundaryConditionPost();
+    auto bcTarget = *bc.target<bcFunctionParamter>();
+    EXPECT_TRUE(*bcTarget == BBStressDev27)
+        << "The returned boundary condition is not the expected function BBStressDev27.";
+
+    bcFactory.setStressBoundaryCondition(BoundaryConditionFactory::StressBC::StressCompressible);
+    bc = bcFactory.getStressBoundaryConditionPost();
+    bcTarget = *bc.target<bcFunctionParamter>();
+    EXPECT_TRUE(*bcTarget == QStressDevComp27)
+        << "The returned boundary condition is not the expected function QStressDevComp27.";
+}
diff --git a/src/gpu/VirtualFluids_GPU/CMakeLists.txt b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
index 53707fa381228e4d2ca380e3ba16f5bf0e5f2d38..40496abc6902d01f642b9e117d170d95c199100f 100644
--- a/src/gpu/VirtualFluids_GPU/CMakeLists.txt
+++ b/src/gpu/VirtualFluids_GPU/CMakeLists.txt
@@ -21,5 +21,6 @@ if(BUILD_VF_UNIT_TESTS)
     set_source_files_properties(Kernel/Utilities/DistributionHelperTests.cpp PROPERTIES LANGUAGE CUDA)
 	set_source_files_properties(DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp PROPERTIES LANGUAGE CUDA)
     set_source_files_properties(Communication/ExchangeData27Test.cpp PROPERTIES LANGUAGE CUDA)
+    set_source_files_properties(BoundaryConditions/BoundaryConditionFactoryTest.cpp PROPERTIES LANGUAGE CUDA)
     target_include_directories(VirtualFluids_GPUTests PRIVATE "${VF_THIRD_DIR}/cuda_samples/")
 endif()
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
index 9b05b9baae1ba13bfcaf6848d1855d7273ca6d8b..42997be82687ab480a2d4d45b0793ba307e2ebf4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Calc2ndMoments.h
@@ -7,18 +7,18 @@
 #include "GPU/CudaMemoryManager.h"
 
 //2nd
-extern "C" void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void init2ndMoments(Parameter* para);
-extern "C" void calc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void alloc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void init2ndMoments(Parameter* para);
+void calc2ndMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 //3rd
-extern "C" void alloc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void init3rdMoments(Parameter* para);
-extern "C" void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void alloc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void init3rdMoments(Parameter* para);
+void calc3rdMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 //higher order
-extern "C" void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void initHigherOrderMoments(Parameter* para);
-extern "C" void calcHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void allocHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void initHigherOrderMoments(Parameter* para);
+void calcHigherOrderMoments(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
index 71b7b7afdc5a0508d030e1698f352cd7686b96dc..262c22a1a557bfdd6aefaee492d2f8351f166599 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcMedian.h
@@ -6,9 +6,9 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void calcMedian(Parameter* para, unsigned int tdiff);
-extern "C" void resetMedian(Parameter* para);
+void allocMedian(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void allocMedianAD(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcMedian(Parameter* para, unsigned int tdiff);
+void resetMedian(Parameter* para);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
index 54fa98c1550fb2a8e47e0cc2e6541295335e3e30..e91fb6f5c232bd98073a1c930149693f8af4b078 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.cpp
@@ -153,7 +153,7 @@ void writeTiStuffToFile(Parameter *para, uint timestep, int sizeOfTiArray, std::
     // set filename
     std::string names;
     std::for_each(datanames.begin(), datanames.end(), [&names](const std::string &s) { return names += "_" + s; });
-    std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyID()) + "_" +
+    std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyProcessID()) + "_" +
                          StringUtil::toString<int>(timestep) + names + "_ti.txt";
     const char *fname = ffname.c_str();
     ////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
index 6893d6dcb4f4c35cc55fda6ad3dbdf93c0bd2a83..f70973eb5921a17c3229a026623de2a0ef9f3ce4 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CalcTurbulenceIntensity.h
@@ -6,11 +6,11 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager);
-extern "C" void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
-extern "C" void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
-extern "C" void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaMemoryManager);
-extern "C" void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void allocTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void calcVelocityAndFluctuations(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
+void calcTurbulenceIntensity(Parameter *para, CudaMemoryManager *cudaMemoryManager, uint tdiff);
+void resetVelocityFluctuationsAndMeans(Parameter *para, CudaMemoryManager *cudaMemoryManager);
+void cudaFreeTurbulenceIntensityArrays(Parameter *para, CudaMemoryManager *cudaMemoryManager);
 
 
 void writeTurbulenceIntensityToFile(Parameter *para, uint timestep);
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisionStrategy.h b/src/gpu/VirtualFluids_GPU/Calculation/CollisionStrategy.h
new file mode 100644
index 0000000000000000000000000000000000000000..70894346cbd334da3a4f8aa82648c33fda733f76
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisionStrategy.h
@@ -0,0 +1,35 @@
+#ifndef COLLISONSTRATEGY_H
+#define COLLISONSTRATEGY_H
+
+#include "UpdateGrid27.h"
+
+//! \brief get a function which performs the collision operator and performs the communication between gpus/ processes
+//! \return a function to perform the collision and for multi-gpu simulations also the communication
+std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t)>
+    getFunctionForCollisionAndExchange(const bool useStreams, const int numberOfMpiProcesses,
+                                       const bool kernelNeedsFluidNodeIndicesToRun);
+
+//! \brief Version of collision: for multi-gpu simulations, without communication hiding ("streams"), for newer kernels that use an array of fluid nodes to determine which nodes to update
+class CollisionAndExchange_noStreams_indexKernel
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t);
+};
+
+//! \brief Version of collision: for multi-gpu simulations, without communication hiding ("streams"), for old kernels
+//! \details the only options for old kernel
+class CollisionAndExchange_noStreams_oldKernel
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t);
+};
+
+//! \brief Version of collision: for multi-gpu simulations, with communication hiding ("streams"), for newer kernels that use an array of fluid nodes to determine which nodes to update
+//! \details recommended for multi-gpu simulations if the chosen collision kernel supports the use of cuda streams
+class CollisionAndExchange_streams
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t);
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4a14d19c10936f84379f332ef24f081f0ebb0cb7
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/CollisisionStrategy.cpp
@@ -0,0 +1,88 @@
+#include "CollisionStrategy.h"
+#include "Parameter/CudaStreamManager.h"
+#include "Parameter/Parameter.h"
+#include "logger/Logger.h"
+
+std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t)>
+getFunctionForCollisionAndExchange(const bool useStreams, const int numberOfMpiProcesses,
+                                   const bool kernelNeedsFluidNodeIndicesToRun)
+{
+    VF_LOG_INFO("Function used for collisionAndExchange: ");
+
+    if (useStreams && numberOfMpiProcesses > 1 && kernelNeedsFluidNodeIndicesToRun) {
+        VF_LOG_INFO("CollisionAndExchange_streams()");
+        return CollisionAndExchange_streams();
+
+    } else if (useStreams && !kernelNeedsFluidNodeIndicesToRun) {
+        VF_LOG_INFO("Cuda Streams can only be used with kernels which run using fluidNodesIndices.");
+
+    } else if (useStreams && numberOfMpiProcesses <= 1) {
+        VF_LOG_INFO("Cuda Streams can only be used with multiple MPI processes.");
+
+    } else if (!useStreams && kernelNeedsFluidNodeIndicesToRun) {
+        VF_LOG_INFO("CollisionAndExchange_noStreams_indexKernel()");
+        return CollisionAndExchange_noStreams_indexKernel();
+
+    } else if (!useStreams && !kernelNeedsFluidNodeIndicesToRun) {
+        VF_LOG_INFO("CollisionAndExchange_noStreams_oldKernel()");
+        return CollisionAndExchange_noStreams_oldKernel();
+    }
+
+    throw std::runtime_error("Invalid Configuration for collision and exchange");
+    return nullptr;
+}
+
+void CollisionAndExchange_noStreams_indexKernel::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level,
+                                                            unsigned int t)
+{
+    //! \details steps:
+    //!
+    //! 1. run collision
+    //!
+    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices,
+                                    para->getParD(level)->numberOfFluidNodes, -1);
+
+    //! 2. exchange information between GPUs
+    updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
+}
+
+void CollisionAndExchange_noStreams_oldKernel::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level,
+                                                          unsigned int t)
+{
+    //! \details steps:
+    //!
+    //! 1. run collision
+    //!
+    updateGrid->collisionAllNodes(level, t);
+
+    //! 2. exchange information between GPUs
+    updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
+}
+
+void CollisionAndExchange_streams::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level, unsigned int t)
+{
+    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
+    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
+
+    //! \details steps:
+    //!
+    //! 1. run collision for nodes which are at the border of the gpus/processes
+    //!
+    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndicesBorder,
+                                    para->getParD(level)->numberOfFluidNodesBorder, borderStreamIndex);
+
+    //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
+    //!
+    updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex);
+    if (para->getUseStreams())
+        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
+
+    //! 3. launch the collision kernel for bulk nodes
+    //!
+    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
+    updateGrid->collisionUsingIndices(level, t, para->getParD(level)->fluidNodeIndices,
+                                    para->getParD(level)->numberOfFluidNodes, bulkStreamIndex);
+
+    //! 4. exchange information between GPUs
+    updateGrid->exchangeMultiGPU(level, borderStreamIndex);
+}
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
index 08901207f6b6d132a13666bb41dd2cc0508f8724..9ee4cb917cdbf76dddf988b4456d5d611c9a11e0 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Cp.cpp
@@ -69,7 +69,7 @@ void printCpTopIntermediateStep(Parameter* para, unsigned int t, int lev)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyID()) + "_" + StringUtil::toString<int>(t) + "_cp_top.txt";
+	std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyProcessID()) + "_" + StringUtil::toString<int>(t) + "_cp_top.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -101,7 +101,7 @@ void printCpTop(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyID())+"_cp_top.txt";
+	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyProcessID())+"_cp_top.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -137,7 +137,7 @@ void printCpBottom(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	int lev = para->getMaxLevel();
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyID())+"_cp_bottom.txt";
+	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyProcessID())+"_cp_bottom.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -173,7 +173,7 @@ void printCpBottom2(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	int lev = para->getMaxLevel();
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyID())+"_cp_bottom2.txt";
+	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyProcessID())+"_cp_bottom2.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -313,19 +313,19 @@ void printCaseFile(Parameter* para)
 	//////////////////////////////////////////////////////////////////////////
 	double deltaXcoarse = 0.256; // [m]
 	double deltat = (para->getVelocity() * deltaXcoarse) / (para->getVelocity() * para->getVelocityRatio());
-	unsigned int numberOfSteps = (unsigned int)((para->getTEnd() - para->getTStartOut()) * pow(2,5) );
+	unsigned int numberOfSteps = (unsigned int)((para->getTimestepEnd() - para->getTimestepStartOut()) * pow(2,5) );
 	//cout << "number of nodes:" << numberOfSteps << endl;
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName() + "_" + StringUtil::toString<int>(para->getMyID()) + ".case";
+	std::string ffname = para->getFName() + "_" + StringUtil::toString<int>(para->getMyProcessID()) + ".case";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set filename geo
-	std::string ffnameGeo = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyID()) + ".geo";
+	std::string ffnameGeo = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyProcessID()) + ".geo";
 	const char* fnameGeo = ffnameGeo.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set filename scalar
-	std::string ffnameScalar = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyID()) + ".*****.p";
+	std::string ffnameScalar = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyProcessID()) + ".*****.p";
 	const char* fnameScalar = ffnameScalar.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -370,11 +370,11 @@ void printCaseFile(Parameter* para)
 
 
 
-extern "C" void printGeoFile(Parameter* para, bool fileFormat)
+void printGeoFile(Parameter* para, bool fileFormat)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename geo
-	std::string ffnameGeo = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyID());
+	std::string ffnameGeo = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyProcessID());
 	const char* fnameGeo = ffnameGeo.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	char fname[1024];
@@ -545,11 +545,11 @@ extern "C" void printGeoFile(Parameter* para, bool fileFormat)
 
 
 
-extern "C" void printScalars(Parameter* para, bool fileFormat)
+void printScalars(Parameter* para, bool fileFormat)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename scalar
-	std::string ffnameScalar = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyID());
+	std::string ffnameScalar = para->getOutputPrefix() + "_" + StringUtil::toString<int>(para->getMyProcessID());
 	const char* fnameScalar = ffnameScalar.c_str();
 	//////////////////////////////////////////////////////////////////////////
     char fname[1024];
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/Cp.h b/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
index 5bb4b9c3cc81381fd3b9fb69f97636d53c8f39ee..bc70e1ae093269f038e699c0c5ce66cee63c0b12 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/Cp.h
@@ -6,23 +6,23 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void calcCp(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void printCpTopIntermediateStep(Parameter* para, unsigned int t, int lev);
-extern "C" void printCpTop(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void printCpBottom(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printCpBottom2(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcCp(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void printCpTopIntermediateStep(Parameter* para, unsigned int t, int lev);
+void printCpTop(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void printCpBottom(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printCpBottom2(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 
 
-extern "C" void excludeGridInterfaceNodesForMirror(Parameter* para, int lev);
-extern "C" void calcPressForMirror(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void excludeGridInterfaceNodesForMirror(Parameter* para, int lev);
+void calcPressForMirror(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
 //Ensight Gold
-extern "C" void printCaseFile(Parameter* para);
-extern "C" void printGeoFile(Parameter* para, bool fileFormat);
-extern "C" void printScalars(Parameter* para, bool fileFormat);
+void printCaseFile(Parameter* para);
+void printGeoFile(Parameter* para, bool fileFormat);
+void printScalars(Parameter* para, bool fileFormat);
 //functions to write binary files
-extern "C" void writeIntToFile(const int &i, std::ofstream &ofile);
-extern "C" void writeFloatToFile(const float &f, std::ofstream &ofile);
-extern "C" void writeStringToFile(const std::string &s, std::ofstream &ofile);
+void writeIntToFile(const int &i, std::ofstream &ofile);
+void writeFloatToFile(const float &f, std::ofstream &ofile);
+void writeStringToFile(const std::string &s, std::ofstream &ofile);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
index 648fe6cc9c8fffc7574bc60c89e194937a7974e1..97d2af28ef7f801e817dd6cae6ad58d244249e02 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.cpp
@@ -104,7 +104,7 @@ void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int ti
 	int lev = para->getMaxLevel();
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyID())+"_"+StringUtil::toString<int>(timestep)+"_DragLift.txt";
+	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyProcessID())+"_"+StringUtil::toString<int>(timestep)+"_DragLift.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -122,7 +122,7 @@ void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int ti
 	//close file
 	ostr.close();
 	//////////////////////////////////////////////////////////////////////////
-	if (timestep == (int)para->getTEnd())
+	if (timestep == (int)para->getTimestepEnd())
 	{
 		cudaMemoryManager->cudaFreeDragLift(lev);
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
index ab531e576d7666e233c345ecf64338be149cafd5..8be15d423b65e0fdffc3a5af44e7dc5dbdbf4e6a 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/DragLift.h
@@ -6,8 +6,8 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void calcDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void allocDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
+void calcDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void allocDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printDragLift(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
index 2964172fb0335aea3517dfb54ae7245f8ee5b6ae..d62e8fee24dad1cde7ccd2044a5a5f9573f7ff82 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.cpp
@@ -125,7 +125,7 @@ void ForceCalculations::printForcing(Parameter* para)
 {
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyID()) + "_forcing.txt";
+	std::string ffname = para->getFName() + StringUtil::toString<int>(para->getMyProcessID()) + "_forcing.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
index 684b3e9946e3e3e693d7cd36c0bb6445382b1b92..4506d23abd7068697a089c926d684406af789aef 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/ForceCalculations.h
@@ -35,8 +35,8 @@ private:
 };
 
 
-//extern "C" void calcVeloForce(Parameter* para);
-//extern "C" void allocVeloForForcing(Parameter* para);
-//extern "C" void printForcing(Parameter* para);
+//void calcVeloForce(Parameter* para);
+//void allocVeloForForcing(Parameter* para);
+//void printForcing(Parameter* para);
 
 #endif /* FORCE_CALCULATIONS_H */
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
index be90565d71a11054a05d663291f5353f9d61f624..13b6bd662a1b51a9a7a850211751c8b8b5ecf329 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.cpp
@@ -23,13 +23,13 @@ void setSizeOfPlane(Parameter* para, int lev, unsigned int z)
       for (unsigned int i=1; i<para->getParH(lev)->gridNX + 2 * STARTOFFX - 1; i++)
       {
          mm[0]= para->getParH(lev)->nx*(para->getParH(lev)->ny*k + j) + i;
-         mm[1]= mm[0]                                                                       -1; //W
-         mm[2]= mm[0]                                                -para->getParH(lev)->nx-1; //SW
-         mm[3]= mm[0]                                                -para->getParH(lev)->nx;   //S
-         mm[4]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny);                          //B
-         mm[5]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)                       -1; //BW
-         mm[6]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)-para->getParH(lev)->nx;   //BS
-         mm[7]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)-para->getParH(lev)->nx-1; //BSW
+         mm[1]= mm[0]                                                                       -1; //DIR_M00
+         mm[2]= mm[0]                                                -para->getParH(lev)->nx-1; //DIR_MM0
+         mm[3]= mm[0]                                                -para->getParH(lev)->nx;   //DIR_0M0
+         mm[4]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny);                          //DIR_00M
+         mm[5]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)                       -1; //DIR_M0M
+         mm[6]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)-para->getParH(lev)->nx;   //DIR_0MM
+         mm[7]= mm[0]-(para->getParH(lev)->nx*para->getParH(lev)->ny)-para->getParH(lev)->nx-1; //DIR_MMM
 
          if ( para->getParH(lev)->geo[mm[0]] != GEO_VOID ||
               para->getParH(lev)->geo[mm[1]] != GEO_VOID ||
@@ -267,7 +267,7 @@ void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	int lev = para->getCoarse();
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffnameIn = para->getFName() + UbSystem::toString(para->getMyID()) + "_" + "In" + "_PlaneConc.txt";
+	std::string ffnameIn = para->getFName() + UbSystem::toString(para->getMyProcessID()) + "_" + "In" + "_PlaneConc.txt";
 	const char* fnameIn = ffnameIn.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -287,7 +287,7 @@ void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffnameOut1 = para->getFName() + UbSystem::toString(para->getMyID()) + "_" + "Out1" + "_PlaneConc.txt";
+	std::string ffnameOut1 = para->getFName() + UbSystem::toString(para->getMyProcessID()) + "_" + "Out1" + "_PlaneConc.txt";
 	const char* fnameOut1 = ffnameOut1.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -307,7 +307,7 @@ void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffnameOut2 = para->getFName() + UbSystem::toString(para->getMyID()) + "_" + "Out2" + "_PlaneConc.txt";
+	std::string ffnameOut2 = para->getFName() + UbSystem::toString(para->getMyProcessID()) + "_" + "Out2" + "_PlaneConc.txt";
 	const char* fnameOut2 = ffnameOut2.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -342,7 +342,7 @@ void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep
 	int lev = 0;
 	//////////////////////////////////////////////////////////////////////////
 	//set filename
-	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyID())+"_"+StringUtil::toString<int>(timestep)+"_RE.txt";
+	std::string ffname = para->getFName()+StringUtil::toString<int>(para->getMyProcessID())+"_"+StringUtil::toString<int>(timestep)+"_RE.txt";
 	const char* fname = ffname.c_str();
 	//////////////////////////////////////////////////////////////////////////
 	//set ofstream
@@ -375,7 +375,7 @@ void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep
 	//close file
 	ostr.close();
 	//////////////////////////////////////////////////////////////////////////
-	if (timestep == (int)para->getTEnd())
+	if (timestep == (int)para->getTimestepEnd())
 	{
 		cudaMemoryManager->cudaFreeTestRE(lev);
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
index 47443169829e5bdd1d6c6ed31eaca1c259c783b7..50f49b85df2a87e3921ac7133630c128da0caebd 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/PlaneCalculations.h
@@ -8,15 +8,15 @@
 #include <iostream>
 #include <stdio.h>
 
-extern "C" void setSizeOfPlane(Parameter* para, int lev, unsigned int z);
-extern "C" void calcPressure(Parameter* para, std::string inorout, int lev);
-extern "C" void calcFlowRate(Parameter* para, int lev);
+void setSizeOfPlane(Parameter* para, int lev, unsigned int z);
+void calcPressure(Parameter* para, std::string inorout, int lev);
+void calcFlowRate(Parameter* para, int lev);
 
 //advection + diffusion
-extern "C" void calcPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
-extern "C" void allocPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void calcPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager, int lev);
+void allocPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void printPlaneConc(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
+void printRE(Parameter* para, CudaMemoryManager* cudaMemoryManager, int timestep);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..243c87ae2dfbf8100ad6a5e3a5bc2dd3331d0d32
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.cpp
@@ -0,0 +1,158 @@
+#include "RefinementStrategy.h"
+#include "Parameter/CudaStreamManager.h"
+#include "Parameter/Parameter.h"
+#include "logger/Logger.h"
+
+std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level)>
+    getFunctionForRefinementAndExchange(const bool useStreams, const int numberOfMpiProcesses, const int maxLevel,
+                                        const bool useReducedCommunicationAfterFtoC) noexcept
+{
+    VF_LOG_INFO("Function used for refinementAndExchange: ");
+    if (maxLevel == 0) {
+        VF_LOG_INFO("only one level - no function needed.");
+        return NoRefinement();
+
+    } else if (numberOfMpiProcesses == 1) {
+        VF_LOG_INFO("only one process - no exchange needed: Refinement_noExchange()");
+        return Refinement_noExchange();
+
+    } else if (numberOfMpiProcesses > 1 && useStreams && useReducedCommunicationAfterFtoC) {
+        VF_LOG_INFO("RefinementAndExchange_streams_exchangeInterface()");
+        return RefinementAndExchange_streams_exchangeInterface();
+
+    } else if(numberOfMpiProcesses > 1 && useStreams && !useReducedCommunicationAfterFtoC){
+        VF_LOG_INFO("refinementAndExchange_streams_completeExchange()");
+        return RefinementAndExchange_streams_exchangeAllNodes();
+
+    } else if (numberOfMpiProcesses > 1 && !useStreams && useReducedCommunicationAfterFtoC) {
+        VF_LOG_INFO("RefinementAndExchange_noStreams_exchangeInterface()");
+        return RefinementAndExchange_noStreams_exchangeInterface();
+
+    } else {
+        VF_LOG_INFO("RefinementAndExchange_noStreams_exchangeAllNodes()");
+        return RefinementAndExchange_noStreams_exchangeAllNodes();
+    }
+}
+
+void NoRefinement::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level){}
+
+void RefinementAndExchange_streams_exchangeInterface::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
+{
+    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
+    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
+
+    //! \details steps:
+    //!
+    //! 1. Interpolation fine to coarse for nodes which are at the border of the gpus/processes
+    //!
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFCBorder.ICellFCC, para->getParD(level)->intFCBorder.ICellFCF,
+                 para->getParD(level)->intFCBorder.kFC, borderStreamIndex);
+
+    //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
+    //!
+    updateGrid->prepareExchangeMultiGPUAfterFtoC(level, borderStreamIndex);
+    if (para->getUseStreams())
+        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
+
+    //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
+    //!
+    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.ICellFCF,
+                 para->getParD(level)->intFCBulk.kFC, bulkStreamIndex);
+    updateGrid->coarseToFine(level, para->getParD(level)->intCFBulk.ICellCFC, para->getParD(level)->intCFBulk.ICellCFF,
+                 para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, bulkStreamIndex);
+
+    //! 4. exchange information between GPUs (only nodes which are part of the interpolation)
+    //!
+    updateGrid->exchangeMultiGPUAfterFtoC(level, borderStreamIndex);
+
+    // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
+    //!
+    updateGrid->coarseToFine(level, para->getParD(level)->intCFBorder.ICellCFC, para->getParD(level)->intCFBorder.ICellCFF,
+                 para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, borderStreamIndex);
+
+    cudaDeviceSynchronize();
+}
+
+void RefinementAndExchange_streams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level){
+    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
+    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
+
+    //! \details steps:
+    //!
+    //! 1. interpolation fine to coarse for nodes which are at the border of the gpus/processes
+    //!
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFCBorder.ICellFCC, para->getParD(level)->intFCBorder.ICellFCF,
+                 para->getParD(level)->intFCBorder.kFC, borderStreamIndex);
+
+    //! 2. prepare the exchange between gpus (collect the send nodes for communication in a buffer on the gpu) and trigger bulk kernel execution when finished
+    //!
+    updateGrid->prepareExchangeMultiGPU(level, borderStreamIndex);
+    if (para->getUseStreams())
+        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
+
+    //! 3. launch the bulk kernels for both interpolation processes (fine to coarse and coarse to fine)
+    //!
+    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.ICellFCF,
+                 para->getParD(level)->intFCBulk.kFC, bulkStreamIndex);
+    updateGrid->coarseToFine(level, para->getParD(level)->intCFBulk.ICellCFC, para->getParD(level)->intCFBulk.ICellCFF,
+                 para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, bulkStreamIndex);
+
+    //! 4. exchange information between GPUs (all nodes)
+    //!
+    updateGrid->exchangeMultiGPU(level, borderStreamIndex);
+
+    // 5. interpolation fine to coarse for nodes which are at the border of the gpus/processes
+    //!
+    updateGrid->coarseToFine(level, para->getParD(level)->intCFBorder.ICellCFC, para->getParD(level)->intCFBorder.ICellCFF,
+                 para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, borderStreamIndex);
+
+    cudaDeviceSynchronize();
+}
+
+void RefinementAndExchange_noStreams_exchangeInterface::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
+{
+    //! \details steps:
+    //!
+    //! 1. interpolation fine to coarse
+    //!
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
+
+    //! 2. exchange information between GPUs (only nodes which are part of the interpolation)
+    //!
+    updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, true);
+
+    //! 3. interpolation coarse to fine
+    updateGrid->coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
+                             para->getParD(level)->offCF, -1);
+}
+
+void RefinementAndExchange_noStreams_exchangeAllNodes::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
+{
+    //! \details steps:
+    //!
+    //! 1. interpolation fine to coarse
+    //!
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
+    
+    //! 2. exchange information between GPUs (all nodes)
+    //!
+    updateGrid->exchangeMultiGPU_noStreams_withPrepare(level, false);
+
+    //! 3. interpolation coarse to fine
+    updateGrid->coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
+                             para->getParD(level)->offCF, -1);
+}
+
+void Refinement_noExchange::operator()(UpdateGrid27 *updateGrid, Parameter *para, int level)
+{
+    //! \details steps:
+    //!
+    //! 1. interpolation fine to coarse
+    //!
+    updateGrid->fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
+    //! 2. interpolation coarse to fine
+    updateGrid->coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
+                 para->getParD(level)->offCF, -1);
+}
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.h b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.h
new file mode 100644
index 0000000000000000000000000000000000000000..4da3174a9840563d9e75688b35ce61fe9e08c493
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Calculation/RefinementStrategy.h
@@ -0,0 +1,56 @@
+#ifndef REFINEMENTSTRATEGY_H
+#define REFINEMENTSTRATEGY_H
+
+#include "UpdateGrid27.h"
+
+//! \brief get a function which performs the interpolation between grid levels and performs the communication between gpus/ processes
+//! \return a function to perform the interpolation and for multi-gpu simulations also the communication
+std::function<void(UpdateGrid27 *updateGrid, Parameter *para, int level)>
+    getFunctionForRefinementAndExchange(const bool useStreams, const int numberOfMpiProcesses, const int maxLevel,
+                                        const bool useReducedCommunicationAfterFtoC) noexcept;
+
+//! \brief Version of refinement: for multi-gpu simulations, with communication hiding ("streams"), only exchange the interpolated cells
+//! \details recommended for multi-gpu simulations if the chosen collision kernel supports the use of cuda streams
+class RefinementAndExchange_streams_exchangeInterface
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+//! \brief Version of refinement: for multi-gpu simulations, with communication hiding ("streams"), exchange all nodes
+class RefinementAndExchange_streams_exchangeAllNodes
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+//! \brief Version of refinement: for multi-gpu simulations, without communication hiding ("streams"), only exchange the interpolated cells
+//! \details recommended for multi-gpu simulations if the chosen collision kernel does NOT support the use of cuda streams
+class RefinementAndExchange_noStreams_exchangeInterface
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+//! \brief Version of refinement: for multi-gpu simulations, without communication hiding ("streams"), exchange all nodes
+class RefinementAndExchange_noStreams_exchangeAllNodes
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+//! \brief Version of refinement: for single-gpu simulations
+class Refinement_noExchange
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+//! \brief Version of refinement: for uniform simulations (no grid refinement)
+class NoRefinement
+{
+public:
+    void operator()(UpdateGrid27 *updateGrid, Parameter *para, int level);
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
index ab341c781c853a8f3c25e3cfbb0233b26ac95e49..116d9c673a4b6f1c5d171676b7fc119971be7905 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.cpp
@@ -4,12 +4,15 @@
 
 #include "Communication/ExchangeData27.h"
 #include "Parameter/CudaStreamManager.h"
-#include "GPU/TurbulentViscosity.h"
 #include "KernelManager/BCKernelManager.h"
 #include "KernelManager/ADKernelManager.h"
 #include "KernelManager/GridScalingKernelManager.h"
+#include "TurbulenceModels/TurbulenceModelFactory.h"
 #include "Kernel/Kernel.h"
 
+#include "CollisionStrategy.h"
+#include "RefinementStrategy.h"
+
 void UpdateGrid27::updateGrid(int level, unsigned int t)
 {
     //////////////////////////////////////////////////////////////////////////
@@ -21,164 +24,39 @@ void UpdateGrid27::updateGrid(int level, unsigned int t)
 
     //////////////////////////////////////////////////////////////////////////
 
-    (this->*collisionAndExchange)(level, t);
+    collision(this, para.get(), level, t);
 
     //////////////////////////////////////////////////////////////////////////
 
-    this->postCollisionBC(level, t);
+    postCollisionBC(level);
 
     //////////////////////////////////////////////////////////////////////////
 
-    swapBetweenEvenAndOddTimestep(para.get(), level);
+    swapBetweenEvenAndOddTimestep(level);
 
     //////////////////////////////////////////////////////////////////////////
 
-    if (para->getUseWale())
-        calcMacroscopicQuantities(para.get(), level);
+    if (para->getUseWale()) //TODO: make WALE consistent with structure of other turbulence models
+        calcMacroscopicQuantities(level);
 
-    if (para->getUseTurbulentViscosity())
-        calcTurbulentViscosity(para.get(), level);
+    calcTurbulentViscosity(level);
 
     //////////////////////////////////////////////////////////////////////////
 
-    preCollisionBC(level, t);
+    this->preCollisionBC(level, t);
 
     //////////////////////////////////////////////////////////////////////////
     if( level != para->getFine() )
     {
-        (this->*refinementAndExchange)(level);
+        refinement(this, para.get(), level);
     }
 
-    interactWithActuators(para.get(), cudaMemoryManager.get(), level, t);
-
-    interactWithProbes(para.get(), cudaMemoryManager.get(), level, t);
-}
-
-void UpdateGrid27::refinementAndExchange_noRefinementAndExchange(int level) {}
-
-void UpdateGrid27::refinementAndExchange_streams_onlyExchangeInterface(int level)
-{
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
-
-    // fine to coarse border
-    fineToCoarse(level, para->getParD(level)->intFCBorder.ICellFCC, para->getParD(level)->intFCBorder.ICellFCF,
-                 para->getParD(level)->intFCBorder.kFC, borderStreamIndex);
-
-    // prepare exchange and trigger bulk kernel when finished
-    prepareExchangeMultiGPUAfterFtoC(para.get(), level, borderStreamIndex);
-    if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
-
-    // launch bulk kernels (f to c and c to f)
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    fineToCoarse(level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.ICellFCF,
-                 para->getParD(level)->intFCBulk.kFC, bulkStreamIndex);
-    coarseToFine(level, para->getParD(level)->intCFBulk.ICellCFC, para->getParD(level)->intCFBulk.ICellCFF,
-                 para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, bulkStreamIndex);
-
-    // exchange
-    exchangeMultiGPUAfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, borderStreamIndex);
-
-    // coarse to fine border
-    coarseToFine(level, para->getParD(level)->intCFBorder.ICellCFC, para->getParD(level)->intCFBorder.ICellCFF,
-                 para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, borderStreamIndex);
-    cudaDeviceSynchronize();
-}
-
-void UpdateGrid27::refinementAndExchange_streams_completeExchange(int level)
-{
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex = para->getStreamManager()->getBulkStreamIndex();
-
-    // fine to coarse border
-    fineToCoarse(level, para->getParD(level)->intFCBorder.ICellFCC, para->getParD(level)->intFCBorder.ICellFCF,
-                 para->getParD(level)->intFCBorder.kFC, borderStreamIndex);
-
-    // prepare exchange and trigger bulk kernel when finished
-    prepareExchangeMultiGPU(para.get(), level, borderStreamIndex);
-    if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
-
-    // launch bulk kernels (f to c and c to f)
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    fineToCoarse(level, para->getParD(level)->intFCBulk.ICellFCC, para->getParD(level)->intFCBulk.ICellFCF,
-                 para->getParD(level)->intFCBulk.kFC, bulkStreamIndex);
-    coarseToFine(level, para->getParD(level)->intCFBulk.ICellCFC, para->getParD(level)->intCFBulk.ICellCFF,
-                 para->getParD(level)->intCFBulk.kCF, para->getParD(level)->offCFBulk, bulkStreamIndex);
-
-    // exchange
-    exchangeMultiGPU(para.get(), comm, cudaMemoryManager.get(), level, borderStreamIndex);
-
-    // coarse to fine border
-    coarseToFine(level, para->getParD(level)->intCFBorder.ICellCFC, para->getParD(level)->intCFBorder.ICellCFF,
-                 para->getParD(level)->intCFBorder.kCF, para->getParD(level)->offCF, borderStreamIndex);
-    cudaDeviceSynchronize();
-}
-
-void UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface(int level)
-{
-    fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
-
-    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaMemoryManager.get(), level, true);
-
-    coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
-                 para->getParD(level)->offCF, -1);
-}
-
-void UpdateGrid27::refinementAndExchange_noStreams_completeExchange(int level)
-{
-    fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
-
-    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaMemoryManager.get(), level, false);
-
-    coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
-                 para->getParD(level)->offCF, -1);
-}
-
-void UpdateGrid27::refinementAndExchange_noExchange(int level)
-{
-    fineToCoarse(level, para->getParD(level)->intFC.ICellFCC, para->getParD(level)->intFC.ICellFCF, para->getParD(level)->K_FC, -1);
-    coarseToFine(level, para->getParD(level)->intCF.ICellCFC, para->getParD(level)->intCF.ICellCFF, para->getParD(level)->K_CF,
-                 para->getParD(level)->offCF, -1);
-}
-
-void UpdateGrid27::collisionAndExchange_noStreams_indexKernel(int level, unsigned int t)
-{
-    collisionUsingIndex(level, t, para->getParD(level)->fluidNodeIndices,
-                            para->getParD(level)->numberOfFluidNodes, -1);
-    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaMemoryManager.get(), level, false);
-}
-
-void UpdateGrid27::collisionAndExchange_noStreams_oldKernel(int level, unsigned int t)
-{
-    collision(level, t);
-    exchangeMultiGPU_noStreams_withPrepare(para.get(), comm, cudaMemoryManager.get(), level, false);
-}
-
-void UpdateGrid27::collisionAndExchange_streams(int level, unsigned int t)
-{
-    int borderStreamIndex = para->getStreamManager()->getBorderStreamIndex();
-    int bulkStreamIndex   = para->getStreamManager()->getBulkStreamIndex();
-
-    // launch border kernel
-    collisionUsingIndex(level, t, para->getParD(level)->fluidNodeIndicesBorder,
-                        para->getParD(level)->numberOffluidNodesBorder, borderStreamIndex);
+    interactWithActuators(level, t);
 
-    // prepare exchange and trigger bulk kernel when finished
-    prepareExchangeMultiGPU(para.get(), level, borderStreamIndex);
-    if (para->getUseStreams())
-        para->getStreamManager()->triggerStartBulkKernel(borderStreamIndex);
-
-    // launch bulk kernel
-    para->getStreamManager()->waitOnStartBulkKernelEvent(bulkStreamIndex);
-    collisionUsingIndex(level, t, para->getParD(level)->fluidNodeIndices,
-                        para->getParD(level)->numberOfFluidNodes, bulkStreamIndex);
-
-    exchangeMultiGPU(para.get(), comm, cudaMemoryManager.get(), level, borderStreamIndex);
+    interactWithProbes(level, t);
 }
 
-void UpdateGrid27::collision(int level, unsigned int t)
+void UpdateGrid27::collisionAllNodes(int level, unsigned int t)
 {
     kernels.at(level)->run();
 
@@ -193,7 +71,7 @@ void UpdateGrid27::collision(int level, unsigned int t)
         collisionAdvectionDiffusion(level);
 }
 
-void UpdateGrid27::collisionUsingIndex(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream)
+void UpdateGrid27::collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices, uint numberOfFluidNodes, int stream)
 {
     if (fluidNodeIndices != nullptr && numberOfFluidNodes != 0)
         kernels.at(level)->runOnIndices(fluidNodeIndices, numberOfFluidNodes, stream);
@@ -240,41 +118,40 @@ void UpdateGrid27::collisionAdvectionDiffusion(int level)
     this->adKernelManager->runADcollisionKernel(level);
 }
 
-void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex)
+void UpdateGrid27::prepareExchangeMultiGPU(int level, int streamIndex)
 {
-    prepareExchangeCollDataXGPU27AllNodes(para, level, streamIndex);
-    prepareExchangeCollDataYGPU27AllNodes(para, level, streamIndex);
-    prepareExchangeCollDataZGPU27AllNodes(para, level, streamIndex);
+    prepareExchangeCollDataXGPU27AllNodes(para.get(), level, streamIndex);
+    prepareExchangeCollDataYGPU27AllNodes(para.get(), level, streamIndex);
+    prepareExchangeCollDataZGPU27AllNodes(para.get(), level, streamIndex);
 }
 
-void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex)
+void UpdateGrid27::prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex)
 {
-    prepareExchangeCollDataXGPU27AfterFtoC(para, level, streamIndex);
-    prepareExchangeCollDataYGPU27AfterFtoC(para, level, streamIndex);
-    prepareExchangeCollDataZGPU27AfterFtoC(para, level, streamIndex);
+    prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, streamIndex);
+    prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, streamIndex);
+    prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, streamIndex);
 }
 
-void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level,
-                      int streamIndex)
+void UpdateGrid27::exchangeMultiGPU(int level, int streamIndex)
 {
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition
-    exchangeCollDataXGPU27AllNodes(para, comm, cudaManager, level, streamIndex);
-    exchangeCollDataYGPU27AllNodes(para, comm, cudaManager, level, streamIndex);
-    exchangeCollDataZGPU27AllNodes(para, comm, cudaManager, level, streamIndex);
+    exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
+    exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
+    exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
 
-    scatterNodesFromRecvBufferXGPU27AllNodes(para, level, streamIndex);
-    scatterNodesFromRecvBufferYGPU27AllNodes(para, level, streamIndex);
-    scatterNodesFromRecvBufferZGPU27AllNodes(para, level, streamIndex);
+    scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, streamIndex);
+    scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, streamIndex);
+    scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, streamIndex);
 
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition convection diffusion
     if (para->getDiffOn()) {
         if (para->getUseStreams())
             std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl;
-        exchangePostCollDataADXGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADYGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADZGPU27(para, comm, cudaManager, level);
+        exchangePostCollDataADXGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADYGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADZGPU27(para.get(), comm, cudaMemoryManager.get(), level);
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -285,36 +162,36 @@ void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryMa
     // 1D domain decomposition
     // exchangePostCollDataGPU27(para, comm, level);
 }
-void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level, bool useReducedComm)
+void UpdateGrid27::exchangeMultiGPU_noStreams_withPrepare(int level, bool useReducedComm)
 {
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition
     if (useReducedComm) {
         // X
-        prepareExchangeCollDataXGPU27AfterFtoC(para, level, -1);
-        exchangeCollDataXGPU27AfterFtoC(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferXGPU27AfterFtoC(para, level, -1);
+        prepareExchangeCollDataXGPU27AfterFtoC(para.get(), level, -1);
+        exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, -1);
         // Y
-        prepareExchangeCollDataYGPU27AfterFtoC(para, level, -1);
-        exchangeCollDataYGPU27AfterFtoC(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferYGPU27AfterFtoC(para, level, -1);
+        prepareExchangeCollDataYGPU27AfterFtoC(para.get(), level, -1);
+        exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, -1);
         // Z
-        prepareExchangeCollDataZGPU27AfterFtoC(para, level, -1);
-        exchangeCollDataZGPU27AfterFtoC(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferZGPU27AfterFtoC(para, level, -1);
+        prepareExchangeCollDataZGPU27AfterFtoC(para.get(), level, -1);
+        exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, -1);
     } else {
         // X
-        prepareExchangeCollDataXGPU27AllNodes(para, level, -1);
-        exchangeCollDataXGPU27AllNodes(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferXGPU27AllNodes(para, level, -1);
+        prepareExchangeCollDataXGPU27AllNodes(para.get(), level, -1);
+        exchangeCollDataXGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferXGPU27AllNodes(para.get(), level, -1);
         // Y
-        prepareExchangeCollDataYGPU27AllNodes(para, level, -1);
-        exchangeCollDataYGPU27AllNodes(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferYGPU27AllNodes(para, level, -1);
+        prepareExchangeCollDataYGPU27AllNodes(para.get(), level, -1);
+        exchangeCollDataYGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferYGPU27AllNodes(para.get(), level, -1);
         // Z
-        prepareExchangeCollDataZGPU27AllNodes(para, level, -1);
-        exchangeCollDataZGPU27AllNodes(para, comm, cudaManager, level, -1);
-        scatterNodesFromRecvBufferZGPU27AllNodes(para, level, -1);
+        prepareExchangeCollDataZGPU27AllNodes(para.get(), level, -1);
+        exchangeCollDataZGPU27AllNodes(para.get(), comm, cudaMemoryManager.get(), level, -1);
+        scatterNodesFromRecvBufferZGPU27AllNodes(para.get(), level, -1);
     }
 
     //////////////////////////////////////////////////////////////////////////
@@ -322,32 +199,31 @@ void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicat
     if (para->getDiffOn()) {
         if (para->getUseStreams())
             std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl;
-        exchangePostCollDataADXGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADYGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADZGPU27(para, comm, cudaManager, level);
+        exchangePostCollDataADXGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADYGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADZGPU27(para.get(), comm, cudaMemoryManager.get(), level);
     }
 }
-void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager, int level,
-                               int streamIndex)
+void UpdateGrid27::exchangeMultiGPUAfterFtoC(int level, int streamIndex)
 {
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition
-    exchangeCollDataXGPU27AfterFtoC(para, comm, cudaManager, level, streamIndex);
-    exchangeCollDataYGPU27AfterFtoC(para, comm, cudaManager, level, streamIndex);
-    exchangeCollDataZGPU27AfterFtoC(para, comm, cudaManager, level, streamIndex);
+    exchangeCollDataXGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
+    exchangeCollDataYGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
+    exchangeCollDataZGPU27AfterFtoC(para.get(), comm, cudaMemoryManager.get(), level, streamIndex);
 
-    scatterNodesFromRecvBufferXGPU27AfterFtoC(para, level, streamIndex);
-    scatterNodesFromRecvBufferYGPU27AfterFtoC(para, level, streamIndex);
-    scatterNodesFromRecvBufferZGPU27AfterFtoC(para, level, streamIndex);
+    scatterNodesFromRecvBufferXGPU27AfterFtoC(para.get(), level, streamIndex);
+    scatterNodesFromRecvBufferYGPU27AfterFtoC(para.get(), level, streamIndex);
+    scatterNodesFromRecvBufferZGPU27AfterFtoC(para.get(), level, streamIndex);
 
     //////////////////////////////////////////////////////////////////////////
     // 3D domain decomposition convection diffusion
     if (para->getDiffOn()) {
         if (para->getUseStreams())
             std::cout << "Warning: Cuda streams not yet implemented for convection diffusion" << std::endl;
-        exchangePostCollDataADXGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADYGPU27(para, comm, cudaManager, level);
-        exchangePostCollDataADZGPU27(para, comm, cudaManager, level);
+        exchangePostCollDataADXGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADYGPU27(para.get(), comm, cudaMemoryManager.get(), level);
+        exchangePostCollDataADZGPU27(para.get(), comm, cudaMemoryManager.get(), level);
     }
 }
 
@@ -397,13 +273,13 @@ void UpdateGrid27::postCollisionBC(int level, uint t) // TODO add t here so it c
     }
 }
 
-void swapBetweenEvenAndOddTimestep(Parameter* para, int level)
+void UpdateGrid27::swapBetweenEvenAndOddTimestep(int level)
 {
     if (para->getParD(level)->isEvenTimestep==true)  para->getParD(level)->isEvenTimestep=false;
     else                                        para->getParD(level)->isEvenTimestep=true;
 }
 
-void calcMacroscopicQuantities(Parameter* para, int level)
+void UpdateGrid27::calcMacroscopicQuantities(int level)
 {
     CalcMacCompSP27(para->getParD(level)->velocityX,
                     para->getParD(level)->velocityY,
@@ -474,91 +350,40 @@ void UpdateGrid27::coarseToFine(int level, uint *iCellCFC, uint *iCellCFF, uint
     }
 }
 
-void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+void UpdateGrid27::interactWithActuators(int level, unsigned int t)
 {
     for( SPtr<PreCollisionInteractor> actuator: para->getActuators() )
     {
-        actuator->interact(para, cudaManager, level, t);
+        actuator->interact(para.get(), cudaMemoryManager.get(), level, t);
     }
 }
 
-void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t)
+void  UpdateGrid27::interactWithProbes(int level, unsigned int t)
 {
     for( SPtr<PreCollisionInteractor> probe: para->getProbes() )
     {
-        probe->interact(para, cudaManager, level, t);
+        probe->interact(para.get(), cudaMemoryManager.get(), level, t);
     }
 }
 
-void calcTurbulentViscosity(Parameter* para, int level)
+void  UpdateGrid27::calcTurbulentViscosity(int level)
 {
-    if(para->getUseAMD())
-        calcTurbulentViscosityAMD(para, level);
+    this->tmFactory->runTurbulenceModelKernel(level);
 }
 
-
-UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaManager,
-                           std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels , BoundaryConditionFactory* bcFactory)
-    : para(para), comm(comm), cudaMemoryManager(cudaManager), pm(pm), kernels(kernels)
+void UpdateGrid27::exchangeData(int level)
 {
-    chooseFunctionForCollisionAndExchange();
-    chooseFunctionForRefinementAndExchange();
-    this->bcKernelManager = std::make_shared<BCKernelManager>(para, bcFactory);
-    this->adKernelManager = std::make_shared<ADKernelManager>(para);
-    this->gridScalingKernelManager =  std::make_shared<GridScalingKernelManager>(para);
+    exchangeMultiGPU_noStreams_withPrepare(level, false);
 }
 
-void UpdateGrid27::chooseFunctionForCollisionAndExchange()
+UpdateGrid27::UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaMemoryManager,
+                           std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels , BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory>  tmFactory)
+    : para(para), comm(comm), cudaMemoryManager(cudaMemoryManager), pm(pm), kernels(kernels), tmFactory(tmFactory)
 {
-    std::cout << "Function used for collisionAndExchange: ";
-    if (para->getUseStreams() && para->getNumprocs() > 1 && para->getKernelNeedsFluidNodeIndicesToRun()) {
-        this->collisionAndExchange = &UpdateGrid27::collisionAndExchange_streams;
-        std::cout << "collisionAndExchange_streams()" << std::endl;
-
-    } else if (para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) {
-        std::cout << "Cuda Streams can only be used with kernels which run using fluidNodesIndices." << std::endl;
-
-    } else if (para->getUseStreams() && para->getNumprocs() <= 1) {
-        std::cout << "Cuda Streams can only be used with multiple MPI processes." << std::endl;
-
-    } else if (!para->getUseStreams() && para->getKernelNeedsFluidNodeIndicesToRun()) {
-        this->collisionAndExchange = &UpdateGrid27::collisionAndExchange_noStreams_indexKernel;
-        std::cout << "collisionAndExchange_noStreams_indexKernel()" << std::endl;
-
-    } else if (!para->getUseStreams() && !para->getKernelNeedsFluidNodeIndicesToRun()) {
-        this->collisionAndExchange = &UpdateGrid27::collisionAndExchange_noStreams_oldKernel;
-        std::cout << "collisionAndExchange_noStreams_oldKernel()" << std::endl;
+    this->collision = getFunctionForCollisionAndExchange(para->getUseStreams(), para->getNumprocs(), para->getKernelNeedsFluidNodeIndicesToRun());
+    this->refinement = getFunctionForRefinementAndExchange(para->getUseStreams(), para->getNumprocs(), para->getMaxLevel(), para->useReducedCommunicationAfterFtoC);
 
-    } else {
-        std::cout << "Invalid Configuration for collision and exchange" << std::endl;
-    }
+    this->bcKernelManager = std::make_shared<BCKernelManager>(para, bcFactory);
+    this->adKernelManager = std::make_shared<ADKernelManager>(para);
+    this->gridScalingKernelManager =  std::make_shared<GridScalingKernelManager>(para);
 }
-
-void UpdateGrid27::chooseFunctionForRefinementAndExchange()
-{
-    std::cout << "Function used for refinementAndExchange: ";
-    if (para->getMaxLevel() == 0) {
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noRefinementAndExchange;
-        std::cout << "only one level - no function needed." << std::endl;
-
-    } else if (para->getNumprocs() == 1) {
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noExchange;
-        std::cout << "refinementAndExchange_noExchange()" << std::endl;
-
-    } else if (para->getNumprocs() > 1 && para->getUseStreams() && para->useReducedCommunicationAfterFtoC) {
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_streams_onlyExchangeInterface;
-        std::cout << "refinementAndExchange_streams_onlyExchangeInterface()" << std::endl;
-
-    } else if(para->getNumprocs() > 1 && para->getUseStreams() && !para->useReducedCommunicationAfterFtoC){
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_streams_completeExchange;
-        std::cout << "refinementAndExchange_streams_completeExchange()" << std::endl;
-
-    } else if (para->getNumprocs() > 1 && !para->getUseStreams() && para->useReducedCommunicationAfterFtoC) {
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noStreams_onlyExchangeInterface;
-        std::cout << "refinementAndExchange_noStreams_onlyExchangeInterface()" << std::endl;
-
-    } else {
-        this->refinementAndExchange = &UpdateGrid27::refinementAndExchange_noStreams_completeExchange;
-        std::cout << "refinementAndExchange_noStreams_completeExchange()" << std::endl;
-    }
-}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
index d8014c63b770c5886930073bdc06fa261add3922..2f0779c6ae2b1461f01383cf9fef9d17a4a38b01 100644
--- a/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
+++ b/src/gpu/VirtualFluids_GPU/Calculation/UpdateGrid27.h
@@ -10,50 +10,64 @@
 
 class BCKernelManager;
 class ADKernelManager;
+class TurbulenceModelManager;
 class GridScalingKernelManager;
 class Kernel;
 class BoundaryConditionFactory;
+class TurbulenceModelFactory;
+
+class UpdateGrid27;
+using CollisionStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level, unsigned int t)>;
+using RefinementStrategy = std::function<void (UpdateGrid27* updateGrid, Parameter* para, int level)>;
+
 
 class UpdateGrid27
 {
 public:
-    UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaManager,
-                 std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels, BoundaryConditionFactory* bcFactory);
+    UpdateGrid27(SPtr<Parameter> para, vf::gpu::Communicator &comm, SPtr<CudaMemoryManager> cudaMemoryManager,
+                 std::vector<std::shared_ptr<PorousMedia>> &pm, std::vector<SPtr<Kernel>> &kernels, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory);
     void updateGrid(int level, unsigned int t);
+    void exchangeData(int level);
 
 private:
-    void postCollisionBC(int level, unsigned int t);
-    void preCollisionBC(int level, unsigned int t);
-
-    void collision(int level, unsigned int t);
-    void collisionUsingIndex(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1);
+    void collisionAllNodes(int level, unsigned int t);
+    void collisionUsingIndices(int level, unsigned int t, uint *fluidNodeIndices = nullptr, uint numberOfFluidNodes = 0, int stream = -1);
     void collisionAdvectionDiffusion(int level);
+
+    void postCollisionBC(int level);
+    void preCollisionBC(int level, unsigned int t);
     void collisionPorousMedia(int level);
 
     void fineToCoarse(int level, uint *iCellFCC, uint *iCellFCF, uint k_FC, int streamIndex);
     void coarseToFine(int level, uint *iCellCFC, uint *iCellCFF, uint k_CF, OffCF &offCF, int streamIndex);
 
+    void prepareExchangeMultiGPU(int level, int streamIndex);
+    void prepareExchangeMultiGPUAfterFtoC(int level, int streamIndex);
+
+    void exchangeMultiGPU(int level, int streamIndex);
+    void exchangeMultiGPUAfterFtoC(int level, int streamIndex);
+    void exchangeMultiGPU_noStreams_withPrepare(int level, bool useReducedComm);
+
+    void swapBetweenEvenAndOddTimestep(int level);
+
+    void calcMacroscopicQuantities(int level);
+    void calcTurbulentViscosity(int level);
+    void interactWithActuators(int level, unsigned int t);
+    void interactWithProbes(int level, unsigned int t);
+
 private:
-    typedef void (UpdateGrid27::*collisionAndExchangeFun)(int level, unsigned int t);
-    typedef void (UpdateGrid27::*refinementAndExchangeFun)(int level);
-    collisionAndExchangeFun collisionAndExchange   = nullptr;
-    refinementAndExchangeFun refinementAndExchange  = nullptr;
-
-    void chooseFunctionForCollisionAndExchange();
-    void chooseFunctionForRefinementAndExchange();
-
-    // functions for collision and exchange
-    void collisionAndExchange_noStreams_indexKernel(int level, unsigned int t);
-    void collisionAndExchange_noStreams_oldKernel(int level, unsigned int t);
-    void collisionAndExchange_streams(int level, unsigned int t);
-
-    // functions for refinement and exchange
-    void refinementAndExchange_streams_onlyExchangeInterface(int level);
-    void refinementAndExchange_streams_completeExchange(int level);
-    void refinementAndExchange_noStreams_onlyExchangeInterface(int level);
-    void refinementAndExchange_noStreams_completeExchange(int level);
-    void refinementAndExchange_noRefinementAndExchange(int level);
-    void refinementAndExchange_noExchange(int level);
+    CollisionStrategy collision;
+    friend class CollisionAndExchange_noStreams_indexKernel;
+    friend class CollisionAndExchange_noStreams_oldKernel;
+    friend class CollisionAndExchange_streams;
+
+    RefinementStrategy refinement;
+    friend class RefinementAndExchange_streams_exchangeInterface;
+    friend class RefinementAndExchange_streams_exchangeAllNodes;
+    friend class RefinementAndExchange_noStreams_exchangeInterface;
+    friend class RefinementAndExchange_noStreams_exchangeAllNodes;
+    friend class Refinement_noExchange;
+    friend class NoRefinement;
 
 private:
     SPtr<Parameter> para;
@@ -67,32 +81,8 @@ private:
     std::shared_ptr<ADKernelManager> adKernelManager;
     //! \property gridScalingKernelManager is a shared pointer to an object of GridScalingKernelManager
     std::shared_ptr<GridScalingKernelManager> gridScalingKernelManager;
+    //! \property tmFactory is a shared pointer to an object of TurbulenceModelFactory
+    std::shared_ptr<TurbulenceModelFactory> tmFactory;
 };
 
-
-
-
-
-extern "C" void prepareExchangeMultiGPU(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeMultiGPUAfterFtoC(Parameter *para, int level, int streamIndex);
-
-extern "C" void exchangeMultiGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
-                                 int level, int streamIndex);
-extern "C" void exchangeMultiGPUAfterFtoC(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaManager,
-                                 int level, int streamIndex);
-extern "C" void exchangeMultiGPU_noStreams_withPrepare(Parameter *para, vf::gpu::Communicator &comm,
-                                                       CudaMemoryManager *cudaManager, int level, bool useReducedComm);
-
-
-
-extern "C" void swapBetweenEvenAndOddTimestep(Parameter* para, int level);
-
-extern "C" void calcMacroscopicQuantities(Parameter* para, int level);
-
-extern "C" void calcTurbulentViscosity(Parameter* para, int level);
-
-extern "C" void interactWithActuators(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
-
-extern "C" void interactWithProbes(Parameter* para, CudaMemoryManager* cudaManager, int level, unsigned int t);
-
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
index 78e62708474b0e498e51c161f0821627fcfff3de..ec930ebbc06554e948204b74e79e0e25b85f57b5 100644
--- a/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
+++ b/src/gpu/VirtualFluids_GPU/Communication/ExchangeData27.h
@@ -14,9 +14,9 @@
 
 //////////////////////////////////////////////////////////////////////////
 // 1D domain decomposition
-extern "C" void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePreCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                          int level);
-extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                           int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition
@@ -24,11 +24,11 @@ extern "C" void exchangePostCollDataGPU27(Parameter *para, vf::gpu::Communicator
 // functions used for all directions
 
 //! \brief Collect the send nodes in a buffer on the gpu
-extern "C" void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
+void collectNodesInSendBufferGPU(Parameter *para, int level, int streamIndex,
                                             std::vector<ProcessNeighbor27> *sendProcessNeighbor,
                                             unsigned int numberOfSendProcessNeighbors);
 //! \brief Distribute the receive nodes from the buffer on the gpu
-extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
+void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int streamIndex,
                                               std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                               unsigned int numberOfRecvProcessNeighbors);
 //! \brief Copy nodes which are part of the communication in multiple directions
@@ -40,7 +40,7 @@ extern "C" void scatterNodesFromRecvBufferGPU(Parameter *para, int level, int st
 //! copied 
 //! \param recvProcessNeighborHost is a reference to the receive buffer on the host, nodes are copied from here
 //! \param sendProcessNeighborHost is a reference to the send buffer on the host, nodes are copied to here
-extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes,
+void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes,
                               std::vector<ProcessNeighbor27> &recvProcessNeighborHost,
                               std::vector<ProcessNeighbor27> &sendProcessNeighborHost);
 
@@ -49,12 +49,12 @@ extern "C" void copyEdgeNodes(std::vector<LBMSimulationParameter::EdgeNodePositi
 
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-extern "C" void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, int streamIndex);
 //! \brief Collect the send nodes for communication in the x direction in a buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 //! \brief Exchange routine in x direction for simulations on multiple gpus
 //! \details Send and receive the nodes from the communication buffers on the gpus.
 //! \param Communicator is needed for the communication between the processes with mpi
@@ -62,7 +62,7 @@ extern "C" void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int leve
 //! \param streamIndex is the index of a CUDA Stream, which is needed for communication hiding
 //! \param sendProcessNeighborDev, recvProcessNeighborDev, sendProcessNeighborHost, recvProcessNeighborHost are pointers
 //! to the send and receive arrays, both on the device and the host
-extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
@@ -70,85 +70,85 @@ extern "C" void exchangeCollDataXGPU27(Parameter *para, vf::gpu::Communicator &c
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging all nodes
 //! \details Used in the communication after collision step
-extern "C" void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 //! \brief Calls exchangeCollDataXGPU27() for exchanging the nodes, which are part of the communication between the two
 //! interpolation processes on refined grids 
 //! \details Only exchange nodes which are part of the interpolation process on
 //! refined grids. This function is used in the exchange which takes place after the interpolation fine to coarse and
 //! before the interpolation coarse to fine. See [master thesis of Anna Wellmann]
-extern "C" void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Needed to exchange all nodes, used in the communication after collision step
-extern "C" void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, int streamIndex);
 //! \brief Distribute the receive nodes (x direction) from the buffer on the gpu
 //! \details Only exchange nodes which are part of the interpolation process on refined grids. This function is used in
 //! the exchange which takes place after the interpolation fine to coarse and before the interpolation coarse to fine.
 //! See [master thesis of Anna Wellmann]
-extern "C" void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // y
 
-extern "C" void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
-extern "C" void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataYGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHos);
-extern "C" void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 // z
-extern "C" void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
-extern "C" void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataZGPU27(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level, int streamIndex,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborDev,
                                        std::vector<ProcessNeighbor27> *sendProcessNeighborHost,
                                        std::vector<ProcessNeighbor27> *recvProcessNeighborHost);
-extern "C" void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::gpu::Communicator &comm,
                                                CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
-extern "C" void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
+void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::gpu::Communicator &comm,
                                                 CudaMemoryManager *cudaMemoryManager, int level, int streamIndex);
 
-extern "C" void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex);
-extern "C" void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, int streamIndex);
+void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, int streamIndex);
 
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition convection diffusion
-extern "C" void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePreCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                             CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADXGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADYGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
-extern "C" void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
+void exchangePostCollDataADZGPU27(Parameter *para, vf::gpu::Communicator &comm,
                                              CudaMemoryManager *cudaMemoryManager, int level);
 //////////////////////////////////////////////////////////////////////////
 // 3D domain decomposition F3 - K18/K20
-extern "C" void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3XGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-extern "C" void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3YGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
-extern "C" void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
+void exchangeCollDataF3ZGPU(Parameter *para, vf::gpu::Communicator &comm, CudaMemoryManager *cudaMemoryManager,
                                        int level);
 //////////////////////////////////////////////////////////////////////////
-extern "C" void barrierGPU(vf::gpu::Communicator &comm);
+void barrierGPU(vf::gpu::Communicator &comm);
 //////////////////////////////////////////////////////////////////////////
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
index 559abf12377f8e0b587753f1d225da8944d843fd..e197fb5c28611e77406b30ab39aa6af2f54b9ef5 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridProvider.cpp
@@ -36,8 +36,8 @@ void GridProvider::setNumberOfFluidNodes(const int numberOfNodes, const int leve
 }
 
 void GridProvider::setNumberOfFluidNodesBorder(const int numberOfNodes, const int level) const {
-    para->getParH(level)->numberOffluidNodesBorder = numberOfNodes;
-    para->getParD(level)->numberOffluidNodesBorder = numberOfNodes;
+    para->getParH(level)->numberOfFluidNodesBorder = numberOfNodes;
+    para->getParD(level)->numberOfFluidNodesBorder = numberOfNodes;
 }
 
 void GridProvider::setInitalNodeValues(const int numberOfNodes, const int level) const
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
index da4ba9d8b4ad094437ec17b17e7ea653842bf83e..428cddfa34b5e98b85f31c2a1e9ca5be20351bfd 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderFiles/GridReader.cpp
@@ -364,24 +364,24 @@ void GridReader::initalValuesDomainDecompostion(int level)
 				procNeighborsSendX.push_back(pnXsend);
 				procNeighborsRecvX.push_back(pnXrecv);
 				neighborRankX.push_back(process);
-				std::cout << "MyID: " << para->getMyID() << ", neighborRankX: " << process << std::endl;
+				std::cout << "MyID: " << para->getMyProcessID() << ", neighborRankX: " << process << std::endl;
 			}
 			if (para->getIsNeighborY())
 			{
 				procNeighborsSendY.push_back(pnYsend);
 				procNeighborsRecvY.push_back(pnYrecv);
 				neighborRankY.push_back(process);
-				std::cout << "MyID: " << para->getMyID() << ", neighborRankY: " << process << std::endl;
+				std::cout << "MyID: " << para->getMyProcessID() << ", neighborRankY: " << process << std::endl;
 			}
 			if (para->getIsNeighborZ())
 			{
 				procNeighborsSendZ.push_back(pnZsend);
 				procNeighborsRecvZ.push_back(pnZrecv);
 				neighborRankZ.push_back(process);
-				std::cout << "MyID: " << para->getMyID() << ", neighborRankZ: " << process << std::endl;
+				std::cout << "MyID: " << para->getMyProcessID() << ", neighborRankZ: " << process << std::endl;
 			}
 		}
-		std::cout << "MyID: " << para->getMyID() << ", size of neighborRankX: " << neighborRankX.size() << ", size of neighborRankY: " << neighborRankY.size() << ", size of neighborRankZ: " << neighborRankZ.size() << std::endl;
+		std::cout << "MyID: " << para->getMyProcessID() << ", size of neighborRankX: " << neighborRankX.size() << ", size of neighborRankY: " << neighborRankY.size() << ", size of neighborRankZ: " << neighborRankZ.size() << std::endl;
 	}
 
 	//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -634,7 +634,7 @@ void GridReader::setNoSlipQs(std::shared_ptr<BoundaryQs> boundaryQ) const
 			this->printQSize("no slip", boundaryQ, level);
 			this->setSizeNoSlip(boundaryQ, level);
 			this->initalQStruct(para->getParH(level)->noSlipBC, boundaryQ, level);
-            cudaMemoryManager->cudaCopyWallBC(level);
+            cudaMemoryManager->cudaCopyNoSlipBC(level);
 		}
 	}
 }
@@ -660,9 +660,9 @@ void GridReader::modifyQElement(std::shared_ptr<BoundaryQs> boundaryQ, unsigned
 {
 	QforBoundaryConditions Q;
 	real* QQ = para->getParH(level)->geometryBC.q27[0];
-	Q.q27[vf::lbm::dir::REST] = &QQ[vf::lbm::dir::REST * para->getParH(level)->geometryBC.numberOfBCnodes];
+	Q.q27[vf::lbm::dir::DIR_000] = &QQ[vf::lbm::dir::DIR_000 * para->getParH(level)->geometryBC.numberOfBCnodes];
 	for (unsigned int i = 0; i < boundaryQ->getSize(level); i++)
-		Q.q27[vf::lbm::dir::REST][i] = 0.0f;
+		Q.q27[vf::lbm::dir::DIR_000][i] = 0.0f;
 }
 
 /*------------------------------------------------------------------------------------------------*/
@@ -733,40 +733,40 @@ void GridReader::initalGridInformations()
 
 void GridReader::setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const
 {
-	Q.q27[E] = &QQ[E   *sizeQ];
-	Q.q27[W] = &QQ[W   *sizeQ];
-	Q.q27[N] = &QQ[N   *sizeQ];
-	Q.q27[S] = &QQ[S   *sizeQ];
-	Q.q27[T] = &QQ[T   *sizeQ];
-	Q.q27[B] = &QQ[B   *sizeQ];
-	Q.q27[NE] = &QQ[NE  *sizeQ];
-	Q.q27[SW] = &QQ[SW  *sizeQ];
-	Q.q27[SE] = &QQ[SE  *sizeQ];
-	Q.q27[NW] = &QQ[NW  *sizeQ];
-	Q.q27[TE] = &QQ[TE  *sizeQ];
-	Q.q27[BW] = &QQ[BW  *sizeQ];
-	Q.q27[BE] = &QQ[BE  *sizeQ];
-	Q.q27[TW] = &QQ[TW  *sizeQ];
-	Q.q27[TN] = &QQ[TN  *sizeQ];
-	Q.q27[BS] = &QQ[BS  *sizeQ];
-	Q.q27[BN] = &QQ[BN  *sizeQ];
-	Q.q27[TS] = &QQ[TS  *sizeQ];
-	Q.q27[REST] = &QQ[REST*sizeQ];
-	Q.q27[TNE] = &QQ[TNE *sizeQ];
-	Q.q27[TSW] = &QQ[TSW *sizeQ];
-	Q.q27[TSE] = &QQ[TSE *sizeQ];
-	Q.q27[TNW] = &QQ[TNW *sizeQ];
-	Q.q27[BNE] = &QQ[BNE *sizeQ];
-	Q.q27[BSW] = &QQ[BSW *sizeQ];
-	Q.q27[BSE] = &QQ[BSE *sizeQ];
-	Q.q27[BNW] = &QQ[BNW *sizeQ];
+	Q.q27[DIR_P00] = &QQ[DIR_P00   *sizeQ];
+	Q.q27[DIR_M00] = &QQ[DIR_M00   *sizeQ];
+	Q.q27[DIR_0P0] = &QQ[DIR_0P0   *sizeQ];
+	Q.q27[DIR_0M0] = &QQ[DIR_0M0   *sizeQ];
+	Q.q27[DIR_00P] = &QQ[DIR_00P   *sizeQ];
+	Q.q27[DIR_00M] = &QQ[DIR_00M   *sizeQ];
+	Q.q27[DIR_PP0] = &QQ[DIR_PP0  *sizeQ];
+	Q.q27[DIR_MM0] = &QQ[DIR_MM0  *sizeQ];
+	Q.q27[DIR_PM0] = &QQ[DIR_PM0  *sizeQ];
+	Q.q27[DIR_MP0] = &QQ[DIR_MP0  *sizeQ];
+	Q.q27[DIR_P0P] = &QQ[DIR_P0P  *sizeQ];
+	Q.q27[DIR_M0M] = &QQ[DIR_M0M  *sizeQ];
+	Q.q27[DIR_P0M] = &QQ[DIR_P0M  *sizeQ];
+	Q.q27[DIR_M0P] = &QQ[DIR_M0P  *sizeQ];
+	Q.q27[DIR_0PP] = &QQ[DIR_0PP  *sizeQ];
+	Q.q27[DIR_0MM] = &QQ[DIR_0MM  *sizeQ];
+	Q.q27[DIR_0PM] = &QQ[DIR_0PM  *sizeQ];
+	Q.q27[DIR_0MP] = &QQ[DIR_0MP  *sizeQ];
+	Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+	Q.q27[DIR_PPP] = &QQ[DIR_PPP *sizeQ];
+	Q.q27[DIR_MMP] = &QQ[DIR_MMP *sizeQ];
+	Q.q27[DIR_PMP] = &QQ[DIR_PMP *sizeQ];
+	Q.q27[DIR_MPP] = &QQ[DIR_MPP *sizeQ];
+	Q.q27[DIR_PPM] = &QQ[DIR_PPM *sizeQ];
+	Q.q27[DIR_MMM] = &QQ[DIR_MMM *sizeQ];
+	Q.q27[DIR_PMM] = &QQ[DIR_PMM *sizeQ];
+	Q.q27[DIR_MPM] = &QQ[DIR_MPM *sizeQ];
 }
 
 void GridReader::setSizeNoSlip(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
 {
 	para->getParH(level)->noSlipBC.numberOfBCnodes = boundaryQ->getSize(level);
 	para->getParD(level)->noSlipBC.numberOfBCnodes = para->getParH(level)->noSlipBC.numberOfBCnodes;
-    cudaMemoryManager->cudaAllocWallBC(level);
+    cudaMemoryManager->cudaAllocNoSlipBC(level);
 }
 
 void GridReader::setSizeGeoQs(std::shared_ptr<BoundaryQs> boundaryQ, unsigned int level) const
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
index d2d04e8c84f55f60f476e753959ab5a9002b3c84..c7aa0db01f1f76dc9bf3c65b64ade8a8f43a7a8b 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.cpp
@@ -18,19 +18,18 @@ using namespace vf::lbm::dir;
 
 GridGenerator::GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator)
 {
-	this->builder = builder;
+    this->builder = builder;
     this->para = para;
     this->cudaMemoryManager = cudaMemoryManager;
     this->indexRearrangement = std::make_unique<IndexRearrangementForStreams>(para, builder, communicator);
 }
 
-GridGenerator::~GridGenerator()
-{
-
-}
+GridGenerator::~GridGenerator() = default;
 
 void GridGenerator::initalGridInformations()
 {
+    if (para->getKernelNeedsFluidNodeIndicesToRun())
+        builder->findFluidNodes(para->getUseStreams());
     std::vector<int> gridX, gridY, gridZ;
     std::vector<int> distX, distY, distZ;
     const int numberOfGridLevels = builder->getNumberOfGridLevels();
@@ -47,22 +46,22 @@ void GridGenerator::initalGridInformations()
 void GridGenerator::allocArrays_CoordNeighborGeo()
 {
     const uint numberOfLevels = builder->getNumberOfGridLevels();
-	std::cout << "Number of Level: " << numberOfLevels << std::endl;
-	int numberOfNodesGlobal = 0;
-	std::cout << "Number of Nodes: " << std::endl;
-	
-	for (uint level = 0; level < numberOfLevels; level++) 
-	{
-		const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
-		numberOfNodesGlobal += numberOfNodesPerLevel;
-		std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
-	
-		setNumberOfNodes(numberOfNodesPerLevel, level);
-	
-		cudaMemoryManager->cudaAllocCoord(level);
+    std::cout << "Number of Level: " << numberOfLevels << std::endl;
+    int numberOfNodesGlobal = 0;
+    std::cout << "Number of Nodes: " << std::endl;
+    
+    for (uint level = 0; level < numberOfLevels; level++) 
+    {
+        const int numberOfNodesPerLevel = builder->getNumberOfNodes(level) + 1;
+        numberOfNodesGlobal += numberOfNodesPerLevel;
+        std::cout << "Level " << level << " = " << numberOfNodesPerLevel << " Nodes" << std::endl;
+    
+        setNumberOfNodes(numberOfNodesPerLevel, level);
+    
+        cudaMemoryManager->cudaAllocCoord(level);
         cudaMemoryManager->cudaAllocSP(level);
         //cudaMemoryManager->cudaAllocF3SP(level);
-		cudaMemoryManager->cudaAllocNeighborWSB(level);
+        cudaMemoryManager->cudaAllocNeighborWSB(level);
 
         if(para->getUseTurbulentViscosity())
             cudaMemoryManager->cudaAllocTurbulentViscosity(level);
@@ -70,18 +69,18 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
         if(para->getIsBodyForce())
             cudaMemoryManager->cudaAllocBodyForce(level);
 
-		builder->getNodeValues(
-			para->getParH(level)->coordinateX,
-			para->getParH(level)->coordinateY,
-			para->getParH(level)->coordinateZ,
-			para->getParH(level)->neighborX,
-			para->getParH(level)->neighborY,
-			para->getParH(level)->neighborZ,
-			para->getParH(level)->neighborInverse,
-			para->getParH(level)->typeOfGridNode,
-			level);
+        builder->getNodeValues(
+            para->getParH(level)->coordinateX,
+            para->getParH(level)->coordinateY,
+            para->getParH(level)->coordinateZ,
+            para->getParH(level)->neighborX,
+            para->getParH(level)->neighborY,
+            para->getParH(level)->neighborZ,
+            para->getParH(level)->neighborInverse,
+            para->getParH(level)->typeOfGridNode,
+            level);
 
-		setInitalNodeValues(numberOfNodesPerLevel, level);
+        setInitalNodeValues(numberOfNodesPerLevel, level);
 
         cudaMemoryManager->cudaCopyNeighborWSB(level);
         cudaMemoryManager->cudaCopySP(level);
@@ -90,9 +89,9 @@ void GridGenerator::allocArrays_CoordNeighborGeo()
             cudaMemoryManager->cudaCopyBodyForce(level);
 
         //std::cout << verifyNeighborIndices(level);
-	}
-	std::cout << "Number of Nodes: " << numberOfNodesGlobal << std::endl;
-	std::cout << "-----finish Coord, Neighbor, Geo------" << std::endl;
+    }
+    std::cout << "Number of Nodes: " << numberOfNodesGlobal << std::endl;
+    std::cout << "-----finish Coord, Neighbor, Geo------" << std::endl;
 }
 
 void GridGenerator::allocArrays_fluidNodeIndices() {
@@ -115,54 +114,58 @@ void GridGenerator::allocArrays_fluidNodeIndicesBorder() {
 
 void GridGenerator::allocArrays_BoundaryValues()
 {
-	std::cout << "------read BoundaryValues------" << std::endl;
+    std::cout << "------read BoundaryValues------" << std::endl;
+    int blocks = 0;
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfPressureValues = int(builder->getPressureSize(level));
-
         std::cout << "size pressure level " << level << " : " << numberOfPressureValues << std::endl;
+
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        para->getParH(level)->pressureBC.numberOfBCnodes = numberOfPressureValues;
-        para->getParD(level)->pressureBC.numberOfBCnodes = numberOfPressureValues;
-        para->getParH(level)->numberOfPressureBCnodesRead = numberOfPressureValues * para->getD3Qxx();
-        para->getParD(level)->numberOfPressureBCnodesRead = numberOfPressureValues * para->getD3Qxx();
+        para->getParH(level)->pressureBC.numberOfBCnodes = 0;
+        para->getParD(level)->outflowPressureCorrectionFactor = para->getOutflowPressureCorrectionFactor();
         if (numberOfPressureValues > 1)
         {
+            blocks = (numberOfPressureValues / para->getParH(level)->numberofthreads) + 1;
+            para->getParH(level)->pressureBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocPress(level);
             builder->getPressureValues(para->getParH(level)->pressureBC.RhoBC, para->getParH(level)->pressureBC.k, para->getParH(level)->pressureBC.kN, level);
             cudaMemoryManager->cudaCopyPress(level);
         }
+        para->getParD(level)->pressureBC.numberOfBCnodes = para->getParH(level)->pressureBC.numberOfBCnodes;
+        para->getParH(level)->numberOfPressureBCnodesRead = para->getParH(level)->pressureBC.numberOfBCnodes * para->getD3Qxx();
+        para->getParD(level)->numberOfPressureBCnodesRead = para->getParH(level)->pressureBC.numberOfBCnodes * para->getD3Qxx();
     }
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfSlipValues = int(builder->getSlipSize(level));
-
         std::cout << "size slip level " << level << " : " << numberOfSlipValues << std::endl;
+
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        para->getParH(level)->slipBC.numberOfBCnodes = numberOfSlipValues;
-        para->getParD(level)->slipBC.numberOfBCnodes = numberOfSlipValues;
-        para->getParH(level)->numberOfSlipBCnodesRead = numberOfSlipValues * para->getD3Qxx();
-        para->getParD(level)->numberOfSlipBCnodesRead = numberOfSlipValues * para->getD3Qxx();
+        para->getParH(level)->slipBC.numberOfBCnodes = 0;
         if (numberOfSlipValues > 1)
         {
+            blocks = (numberOfSlipValues / para->getParH(level)->numberofthreads) + 1;
+            para->getParH(level)->slipBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocSlipBC(level);
             builder->getSlipValues(para->getParH(level)->slipBC.normalX, para->getParH(level)->slipBC.normalY, para->getParH(level)->slipBC.normalZ, para->getParH(level)->slipBC.k, level);
             cudaMemoryManager->cudaCopySlipBC(level);
         }
+        para->getParD(level)->slipBC.numberOfBCnodes = para->getParH(level)->slipBC.numberOfBCnodes;
+        para->getParH(level)->numberOfSlipBCnodesRead = para->getParH(level)->slipBC.numberOfBCnodes * para->getD3Qxx();
+        para->getParD(level)->numberOfSlipBCnodesRead = para->getParH(level)->slipBC.numberOfBCnodes * para->getD3Qxx();
     }
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
         const auto numberOfStressValues = int(builder->getStressSize(level));
-
         std::cout << "size stress level " << level << " : " << numberOfStressValues << std::endl;
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        para->getParH(level)->stressBC.numberOfBCnodes = numberOfStressValues;
-        para->getParD(level)->stressBC.numberOfBCnodes = numberOfStressValues;
-        para->getParH(level)->numberOfStressBCnodesRead = numberOfStressValues * para->getD3Qxx();
-        para->getParD(level)->numberOfStressBCnodesRead = numberOfStressValues * para->getD3Qxx();
 
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+        para->getParH(level)->stressBC.numberOfBCnodes = 0;
         if (numberOfStressValues > 1)
         {
+            blocks = (numberOfStressValues / para->getParH(level)->numberofthreads) + 1;
+            para->getParH(level)->stressBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             cudaMemoryManager->cudaAllocStressBC(level);
             cudaMemoryManager->cudaAllocWallModel(level, para->getHasWallModelMonitor());
             builder->getStressValues(   para->getParH(level)->stressBC.normalX,  para->getParH(level)->stressBC.normalY,  para->getParH(level)->stressBC.normalZ, 
@@ -175,6 +178,9 @@ void GridGenerator::allocArrays_BoundaryValues()
             cudaMemoryManager->cudaCopyStressBC(level);
             cudaMemoryManager->cudaCopyWallModel(level, para->getHasWallModelMonitor());
         }
+        para->getParD(level)->stressBC.numberOfBCnodes = para->getParH(level)->stressBC.numberOfBCnodes;
+        para->getParH(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
+        para->getParD(level)->numberOfStressBCnodesRead = para->getParH(level)->stressBC.numberOfBCnodes * para->getD3Qxx();
     }
     
 
@@ -182,17 +188,13 @@ void GridGenerator::allocArrays_BoundaryValues()
         const auto numberOfVelocityValues = int(builder->getVelocitySize(level));
         std::cout << "size velocity level " << level << " : " << numberOfVelocityValues << std::endl;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        int blocks = (numberOfVelocityValues / para->getParH(level)->numberofthreads) + 1;
-        para->getParH(level)->velocityBC.kArray = blocks * para->getParH(level)->numberofthreads;
-        para->getParD(level)->velocityBC.kArray = para->getParH(level)->velocityBC.kArray;
-        ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        para->getParH(level)->velocityBC.numberOfBCnodes = numberOfVelocityValues;
-        para->getParD(level)->velocityBC.numberOfBCnodes = numberOfVelocityValues;
-        para->getParH(level)->numberOfVeloBCnodesRead = numberOfVelocityValues * para->getD3Qxx();
-        para->getParD(level)->numberOfVeloBCnodesRead = numberOfVelocityValues * para->getD3Qxx();
+
+        para->getParH(level)->velocityBC.numberOfBCnodes = 0;
 
         if (numberOfVelocityValues > 1)
         {
+            blocks = (numberOfVelocityValues / para->getParH(level)->numberofthreads) + 1;
+            para->getParH(level)->velocityBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaAllocVeloBC(level);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -206,30 +208,33 @@ void GridGenerator::allocArrays_BoundaryValues()
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // advection - diffusion stuff
             if (para->getDiffOn()==true){
-            	//////////////////////////////////////////////////////////////////////////
-            	para->getParH(level)->TempVel.kTemp = numberOfVelocityValues;
-            	//cout << "Groesse kTemp = " << para->getParH(i)->TempPress.kTemp << endl;
-            	std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl;
-            	std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl;
-            	//////////////////////////////////////////////////////////////////////////
+                //////////////////////////////////////////////////////////////////////////
+                para->getParH(level)->TempVel.kTemp = para->getParH(level)->velocityBC.numberOfBCnodes;
+                //cout << "Groesse kTemp = " << para->getParH(i)->TempPress.kTemp << endl;
+                std::cout << "getTemperatureInit = " << para->getTemperatureInit() << std::endl;
+                std::cout << "getTemperatureBC = " << para->getTemperatureBC() << std::endl;
+                //////////////////////////////////////////////////////////////////////////
                 cudaMemoryManager->cudaAllocTempVeloBC(level);
-            	//cout << "nach alloc " << endl;
-            	//////////////////////////////////////////////////////////////////////////
-            	for (int m = 0; m < numberOfVelocityValues; m++)
-            	{
-            		para->getParH(level)->TempVel.temp[m]      = para->getTemperatureInit();
-            		para->getParH(level)->TempVel.tempPulse[m] = para->getTemperatureBC();
-            		para->getParH(level)->TempVel.velo[m]      = para->getVelocity();
-            		para->getParH(level)->TempVel.k[m]         = para->getParH(level)->velocityBC.k[m];
-            	}
-            	//////////////////////////////////////////////////////////////////////////
-            	//cout << "vor copy " << endl;
+                //cout << "nach alloc " << endl;
+                //////////////////////////////////////////////////////////////////////////
+                for (uint m = 0; m < para->getParH(level)->velocityBC.numberOfBCnodes; m++)
+                {
+                    para->getParH(level)->TempVel.temp[m]      = para->getTemperatureInit();
+                    para->getParH(level)->TempVel.tempPulse[m] = para->getTemperatureBC();
+                    para->getParH(level)->TempVel.velo[m]      = para->getVelocity();
+                    para->getParH(level)->TempVel.k[m]         = para->getParH(level)->velocityBC.k[m];
+                }
+                //////////////////////////////////////////////////////////////////////////
+                //cout << "vor copy " << endl;
                 cudaMemoryManager->cudaCopyTempVeloBCHD(level);
-            	//cout << "nach copy " << endl;
-            	//////////////////////////////////////////////////////////////////////////
+                //cout << "nach copy " << endl;
+                //////////////////////////////////////////////////////////////////////////
             }
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         }
+        para->getParD(level)->velocityBC.numberOfBCnodes = para->getParH(level)->velocityBC.numberOfBCnodes;
+        para->getParH(level)->numberOfVeloBCnodesRead = para->getParH(level)->velocityBC.numberOfBCnodes * para->getD3Qxx();
+        para->getParD(level)->numberOfVeloBCnodesRead = para->getParH(level)->velocityBC.numberOfBCnodes * para->getD3Qxx();
     }
 
     for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
@@ -340,66 +345,53 @@ void GridGenerator::allocArrays_BoundaryValues()
 
 
     if (builder->hasGeometryValues()) {
-        para->setGeometryValues(true);
-        for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
-            int numberOfGeometryValues = builder->getGeometrySize(i);
-            std::cout << "size geometry values, Level " << i << " : " << numberOfGeometryValues << std::endl;
+        para->setUseGeometryValues(true);
+        for (uint level = 0; level < builder->getNumberOfGridLevels(); level++) {
+            int numberOfGeometryValues = builder->getGeometrySize(level);
+            std::cout << "size geometry values, Level " << level << " : " << numberOfGeometryValues << std::endl;
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            para->getParH(i)->geometryBC.numberOfBCnodes = numberOfGeometryValues;
-            para->getParD(i)->geometryBC.numberOfBCnodes = numberOfGeometryValues;
+
+            para->getParH(level)->geometryBC.numberOfBCnodes = 0;
             if (numberOfGeometryValues > 0)
             {
-
+                blocks = (numberOfGeometryValues / para->getParH(level)->numberofthreads) + 1;
+                para->getParH(level)->geometryBC.numberOfBCnodes = blocks * para->getParH(level)->numberofthreads;
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-                cudaMemoryManager->cudaAllocGeomValuesBC(i);
+                cudaMemoryManager->cudaAllocGeomValuesBC(level);
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-                //Indexarray
 
-                builder->getGeometryValues(para->getParH(i)->geometryBC.Vx, para->getParH(i)->geometryBC.Vy, para->getParH(i)->geometryBC.Vz, i);
+                builder->getGeometryValues(para->getParH(level)->geometryBC.Vx, para->getParH(level)->geometryBC.Vy, para->getParH(level)->geometryBC.Vz, level);
 
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-                for (int m = 0; m < numberOfGeometryValues; m++)
+                for (uint m = 0; m < para->getParH(level)->geometryBC.numberOfBCnodes; m++)
                 {
-                    para->getParH(i)->geometryBC.Vx[m] = para->getParH(i)->geometryBC.Vx[m] / para->getVelocityRatio();
-                    para->getParH(i)->geometryBC.Vy[m] = para->getParH(i)->geometryBC.Vy[m] / para->getVelocityRatio();
-                    para->getParH(i)->geometryBC.Vz[m] = para->getParH(i)->geometryBC.Vz[m] / para->getVelocityRatio();
-                    //para->getParH(i)->geometryBC.Vx[m] = para->getParH(i)->geometryBC.Vx[m] / 100.0f;
-                    //para->getParH(i)->geometryBC.Vy[m] = para->getParH(i)->geometryBC.Vy[m] / 100.0f;
-                    //para->getParH(i)->geometryBC.Vz[m] = para->getParH(i)->geometryBC.Vz[m] / 100.0f;
-                    //para->getParH(i)->geometryBC.Vx[m] = 0.0f;
-                    //para->getParH(i)->geometryBC.Vy[m] = 0.0f;
-                    //para->getParH(i)->geometryBC.Vz[m] = 0.0f;
+                    para->getParH(level)->geometryBC.Vx[m] = para->getParH(level)->geometryBC.Vx[m] / para->getVelocityRatio();
+                    para->getParH(level)->geometryBC.Vy[m] = para->getParH(level)->geometryBC.Vy[m] / para->getVelocityRatio();
+                    para->getParH(level)->geometryBC.Vz[m] = para->getParH(level)->geometryBC.Vz[m] / para->getVelocityRatio();
                 }
-                //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-                ////T�st
-                //for (int m = 0; m < temp4; m++)
-                //{
-                //	para->getParH(i)->geometryBC.Vx[m] = para->getVelocity();//0.035f;
-                //	para->getParH(i)->geometryBC.Vy[m] = 0.0f;//para->getVelocity();//0.0f;
-                //	para->getParH(i)->geometryBC.Vz[m] = 0.0f;
-                //}
-                //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-                cudaMemoryManager->cudaCopyGeomValuesBC(i);
+                cudaMemoryManager->cudaCopyGeomValuesBC(level);
                 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                 //// advection - diffusion stuff
                 //if (para->getDiffOn()==true){
-                //	//////////////////////////////////////////////////////////////////////////
-                //	para->getParH(i)->Temp.kTemp = temp4;
-                //	cout << "Groesse kTemp = " << para->getParH(i)->Temp.kTemp << std::endl;
-                //	//////////////////////////////////////////////////////////////////////////
-                //	para->cudaAllocTempNoSlipBC(i);
-                //	//////////////////////////////////////////////////////////////////////////
-                //	for (int m = 0; m < temp4; m++)
-                //	{
-                //		para->getParH(i)->Temp.temp[m] = para->getTemperatureInit();
-                //		para->getParH(i)->Temp.k[m]    = para->getParH(i)->geometryBC.k[m];
-                //	}
-                //	//////////////////////////////////////////////////////////////////////////
-                //	para->cudaCopyTempNoSlipBCHD(i);
-                //	//////////////////////////////////////////////////////////////////////////
+                //    //////////////////////////////////////////////////////////////////////////
+                //    para->getParH(i)->Temp.kTemp = temp4;
+                //    cout << "Groesse kTemp = " << para->getParH(i)->Temp.kTemp << std::endl;
+                //    //////////////////////////////////////////////////////////////////////////
+                //    para->cudaAllocTempNoSlipBC(i);
+                //    //////////////////////////////////////////////////////////////////////////
+                //    for (int m = 0; m < temp4; m++)
+                //    {
+                //        para->getParH(i)->Temp.temp[m] = para->getTemperatureInit();
+                //        para->getParH(i)->Temp.k[m]    = para->getParH(i)->geometryBC.k[m];
+                //    }
+                //    //////////////////////////////////////////////////////////////////////////
+                //    para->cudaCopyTempNoSlipBCHD(i);
+                //    //////////////////////////////////////////////////////////////////////////
                 //}
                 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             }
+            para->getParD(level)->geometryBC.numberOfBCnodes = para->getParH(level)->geometryBC.numberOfBCnodes;
+
         }
     }//ende geo
 
@@ -837,11 +829,11 @@ void GridGenerator::initalValuesDomainDecompostion()
 
 void GridGenerator::allocArrays_BoundaryQs()
 {
-	std::cout << "------read BoundaryQs-------" << std::endl;
+    std::cout << "------read BoundaryQs-------" << std::endl;
 
 
     for (uint i = 0; i < builder->getNumberOfGridLevels(); i++) {
-        int numberOfPressureValues = (int)builder->getPressureSize(i);
+        const auto numberOfPressureValues = (int)builder->getPressureSize(i);
         if (numberOfPressureValues > 0)
         {
             std::cout << "size Pressure:  " << i << " : " << numberOfPressureValues << std::endl;
@@ -851,33 +843,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             real* QQ = para->getParH(i)->pressureBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->pressureBC.numberOfBCnodes;
             QforBoundaryConditions Q;
-            Q.q27[E] = &QQ[E   *sizeQ];
-            Q.q27[W] = &QQ[W   *sizeQ];
-            Q.q27[N] = &QQ[N   *sizeQ];
-            Q.q27[S] = &QQ[S   *sizeQ];
-            Q.q27[T] = &QQ[T   *sizeQ];
-            Q.q27[B] = &QQ[B   *sizeQ];
-            Q.q27[NE] = &QQ[NE  *sizeQ];
-            Q.q27[SW] = &QQ[SW  *sizeQ];
-            Q.q27[SE] = &QQ[SE  *sizeQ];
-            Q.q27[NW] = &QQ[NW  *sizeQ];
-            Q.q27[TE] = &QQ[TE  *sizeQ];
-            Q.q27[BW] = &QQ[BW  *sizeQ];
-            Q.q27[BE] = &QQ[BE  *sizeQ];
-            Q.q27[TW] = &QQ[TW  *sizeQ];
-            Q.q27[TN] = &QQ[TN  *sizeQ];
-            Q.q27[BS] = &QQ[BS  *sizeQ];
-            Q.q27[BN] = &QQ[BN  *sizeQ];
-            Q.q27[TS] = &QQ[TS  *sizeQ];
-            Q.q27[REST] = &QQ[REST*sizeQ];
-            Q.q27[TNE] = &QQ[TNE *sizeQ];
-            Q.q27[TSW] = &QQ[TSW *sizeQ];
-            Q.q27[TSE] = &QQ[TSE *sizeQ];
-            Q.q27[TNW] = &QQ[TNW *sizeQ];
-            Q.q27[BNE] = &QQ[BNE *sizeQ];
-            Q.q27[BSW] = &QQ[BSW *sizeQ];
-            Q.q27[BSE] = &QQ[BSE *sizeQ];
-            Q.q27[BNW] = &QQ[BNW *sizeQ];
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
             
             builder->getPressureQs(Q.q27, i);
 
@@ -924,33 +890,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             real* QQ = para->getParH(i)->slipBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->slipBC.numberOfBCnodes;
             QforBoundaryConditions Q;
-            Q.q27[E] = &QQ[E   *sizeQ];
-            Q.q27[W] = &QQ[W   *sizeQ];
-            Q.q27[N] = &QQ[N   *sizeQ];
-            Q.q27[S] = &QQ[S   *sizeQ];
-            Q.q27[T] = &QQ[T   *sizeQ];
-            Q.q27[B] = &QQ[B   *sizeQ];
-            Q.q27[NE] = &QQ[NE  *sizeQ];
-            Q.q27[SW] = &QQ[SW  *sizeQ];
-            Q.q27[SE] = &QQ[SE  *sizeQ];
-            Q.q27[NW] = &QQ[NW  *sizeQ];
-            Q.q27[TE] = &QQ[TE  *sizeQ];
-            Q.q27[BW] = &QQ[BW  *sizeQ];
-            Q.q27[BE] = &QQ[BE  *sizeQ];
-            Q.q27[TW] = &QQ[TW  *sizeQ];
-            Q.q27[TN] = &QQ[TN  *sizeQ];
-            Q.q27[BS] = &QQ[BS  *sizeQ];
-            Q.q27[BN] = &QQ[BN  *sizeQ];
-            Q.q27[TS] = &QQ[TS  *sizeQ];
-            Q.q27[REST] = &QQ[REST*sizeQ];
-            Q.q27[TNE] = &QQ[TNE *sizeQ];
-            Q.q27[TSW] = &QQ[TSW *sizeQ];
-            Q.q27[TSE] = &QQ[TSE *sizeQ];
-            Q.q27[TNW] = &QQ[TNW *sizeQ];
-            Q.q27[BNE] = &QQ[BNE *sizeQ];
-            Q.q27[BSW] = &QQ[BSW *sizeQ];
-            Q.q27[BSE] = &QQ[BSE *sizeQ];
-            Q.q27[BNW] = &QQ[BNW *sizeQ];
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
             
             builder->getSlipQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -970,33 +910,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             real* QQ = para->getParH(i)->stressBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->stressBC.numberOfBCnodes;
             QforBoundaryConditions Q;
-            Q.q27[E] = &QQ[E   *sizeQ];
-            Q.q27[W] = &QQ[W   *sizeQ];
-            Q.q27[N] = &QQ[N   *sizeQ];
-            Q.q27[S] = &QQ[S   *sizeQ];
-            Q.q27[T] = &QQ[T   *sizeQ];
-            Q.q27[B] = &QQ[B   *sizeQ];
-            Q.q27[NE] = &QQ[NE  *sizeQ];
-            Q.q27[SW] = &QQ[SW  *sizeQ];
-            Q.q27[SE] = &QQ[SE  *sizeQ];
-            Q.q27[NW] = &QQ[NW  *sizeQ];
-            Q.q27[TE] = &QQ[TE  *sizeQ];
-            Q.q27[BW] = &QQ[BW  *sizeQ];
-            Q.q27[BE] = &QQ[BE  *sizeQ];
-            Q.q27[TW] = &QQ[TW  *sizeQ];
-            Q.q27[TN] = &QQ[TN  *sizeQ];
-            Q.q27[BS] = &QQ[BS  *sizeQ];
-            Q.q27[BN] = &QQ[BN  *sizeQ];
-            Q.q27[TS] = &QQ[TS  *sizeQ];
-            Q.q27[REST] = &QQ[REST*sizeQ];
-            Q.q27[TNE] = &QQ[TNE *sizeQ];
-            Q.q27[TSW] = &QQ[TSW *sizeQ];
-            Q.q27[TSE] = &QQ[TSE *sizeQ];
-            Q.q27[TNW] = &QQ[TNW *sizeQ];
-            Q.q27[BNE] = &QQ[BNE *sizeQ];
-            Q.q27[BSW] = &QQ[BSW *sizeQ];
-            Q.q27[BSE] = &QQ[BSE *sizeQ];
-            Q.q27[BNW] = &QQ[BNW *sizeQ];
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
             
             builder->getStressQs(Q.q27, i);
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1016,34 +930,7 @@ void GridGenerator::allocArrays_BoundaryQs()
             real* QQ = para->getParH(i)->velocityBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->velocityBC.numberOfBCnodes;
             QforBoundaryConditions Q;
-            Q.q27[E] = &QQ[E   *sizeQ];
-            Q.q27[W] = &QQ[W   *sizeQ];
-            Q.q27[N] = &QQ[N   *sizeQ];
-            Q.q27[S] = &QQ[S   *sizeQ];
-            Q.q27[T] = &QQ[T   *sizeQ];
-            Q.q27[B] = &QQ[B   *sizeQ];
-            Q.q27[NE] = &QQ[NE  *sizeQ];
-            Q.q27[SW] = &QQ[SW  *sizeQ];
-            Q.q27[SE] = &QQ[SE  *sizeQ];
-            Q.q27[NW] = &QQ[NW  *sizeQ];
-            Q.q27[TE] = &QQ[TE  *sizeQ];
-            Q.q27[BW] = &QQ[BW  *sizeQ];
-            Q.q27[BE] = &QQ[BE  *sizeQ];
-            Q.q27[TW] = &QQ[TW  *sizeQ];
-            Q.q27[TN] = &QQ[TN  *sizeQ];
-            Q.q27[BS] = &QQ[BS  *sizeQ];
-            Q.q27[BN] = &QQ[BN  *sizeQ];
-            Q.q27[TS] = &QQ[TS  *sizeQ];
-            Q.q27[REST] = &QQ[REST*sizeQ];
-            Q.q27[TNE] = &QQ[TNE *sizeQ];
-            Q.q27[TSW] = &QQ[TSW *sizeQ];
-            Q.q27[TSE] = &QQ[TSE *sizeQ];
-            Q.q27[TNW] = &QQ[TNW *sizeQ];
-            Q.q27[BNE] = &QQ[BNE *sizeQ];
-            Q.q27[BSW] = &QQ[BSW *sizeQ];
-            Q.q27[BSE] = &QQ[BSE *sizeQ];
-            Q.q27[BNW] = &QQ[BNW *sizeQ];
-
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
             builder->getVelocityQs(Q.q27, i);
 
             if (para->getDiffOn()) {
@@ -1169,48 +1056,22 @@ void GridGenerator::allocArrays_BoundaryQs()
             real* QQ = para->getParH(i)->geometryBC.q27[0];
             unsigned int sizeQ = para->getParH(i)->geometryBC.numberOfBCnodes;
             QforBoundaryConditions Q;
-            Q.q27[E] = &QQ[E   *sizeQ];
-            Q.q27[W] = &QQ[W   *sizeQ];
-            Q.q27[N] = &QQ[N   *sizeQ];
-            Q.q27[S] = &QQ[S   *sizeQ];
-            Q.q27[T] = &QQ[T   *sizeQ];
-            Q.q27[B] = &QQ[B   *sizeQ];
-            Q.q27[NE] = &QQ[NE  *sizeQ];
-            Q.q27[SW] = &QQ[SW  *sizeQ];
-            Q.q27[SE] = &QQ[SE  *sizeQ];
-            Q.q27[NW] = &QQ[NW  *sizeQ];
-            Q.q27[TE] = &QQ[TE  *sizeQ];
-            Q.q27[BW] = &QQ[BW  *sizeQ];
-            Q.q27[BE] = &QQ[BE  *sizeQ];
-            Q.q27[TW] = &QQ[TW  *sizeQ];
-            Q.q27[TN] = &QQ[TN  *sizeQ];
-            Q.q27[BS] = &QQ[BS  *sizeQ];
-            Q.q27[BN] = &QQ[BN  *sizeQ];
-            Q.q27[TS] = &QQ[TS  *sizeQ];
-            Q.q27[REST] = &QQ[REST*sizeQ];
-            Q.q27[TNE] = &QQ[TNE *sizeQ];
-            Q.q27[TSW] = &QQ[TSW *sizeQ];
-            Q.q27[TSE] = &QQ[TSE *sizeQ];
-            Q.q27[TNW] = &QQ[TNW *sizeQ];
-            Q.q27[BNE] = &QQ[BNE *sizeQ];
-            Q.q27[BSW] = &QQ[BSW *sizeQ];
-            Q.q27[BSE] = &QQ[BSE *sizeQ];
-            Q.q27[BNW] = &QQ[BNW *sizeQ];
+            getPointersToBoundaryConditions(Q, QQ, sizeQ);
             //////////////////////////////////////////////////////////////////
 
             builder->getGeometryQs(Q.q27, i);
-			//QDebugWriter::writeQValues(Q, para->getParH(i)->geometryBC.k, para->getParH(i)->geometryBC.numberOfBCnodes, "M:/TestGridGeneration/results/GeomGPU.dat");
+            //QDebugWriter::writeQValues(Q, para->getParH(i)->geometryBC.k, para->getParH(i)->geometryBC.numberOfBCnodes, "M:/TestGridGeneration/results/GeomGPU.dat");
             //////////////////////////////////////////////////////////////////
             for (int node_i = 0; node_i < numberOfGeometryNodes; node_i++)
             {
-                Q.q27[REST][node_i] = 0.0f;
+                Q.q27[DIR_000][node_i] = 0.0f;
             }
             //for(int test = 0; test < 3; test++)
             //{
-            //	for (int tmp = 0; tmp < 27; tmp++)
-            //	{
-            //		cout <<"Kuhs: " << Q.q27[tmp][test]  << std::endl;
-            //	}
+            //    for (int tmp = 0; tmp < 27; tmp++)
+            //    {
+            //        cout <<"Kuhs: " << Q.q27[tmp][test]  << std::endl;
+            //    }
             //}
 
             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1239,7 +1100,7 @@ void GridGenerator::allocArrays_BoundaryQs()
     }
 
 
-	std::cout << "-----finish BoundaryQs------" << std::endl;
+    std::cout << "-----finish BoundaryQs------" << std::endl;
 }
 
 void GridGenerator::allocArrays_OffsetScale()
@@ -1274,10 +1135,10 @@ void GridGenerator::allocArrays_OffsetScale()
         para->getParD(level)->mem_size_kFC_off = sizeof(real)* para->getParD(level)->K_FC;
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //alloc
-		cudaMemoryManager->cudaAllocInterfaceCF(level);
-		cudaMemoryManager->cudaAllocInterfaceFC(level);
-		cudaMemoryManager->cudaAllocInterfaceOffCF(level);
-		cudaMemoryManager->cudaAllocInterfaceOffFC(level);
+        cudaMemoryManager->cudaAllocInterfaceCF(level);
+        cudaMemoryManager->cudaAllocInterfaceFC(level);
+        cudaMemoryManager->cudaAllocInterfaceOffCF(level);
+        cudaMemoryManager->cudaAllocInterfaceOffFC(level);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //init
         builder->getOffsetCF(para->getParH(level)->offCF.xOffCF, para->getParH(level)->offCF.yOffCF, para->getParH(level)->offCF.zOffCF, level);
@@ -1292,49 +1153,49 @@ void GridGenerator::allocArrays_OffsetScale()
         }
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         //copy
-		cudaMemoryManager->cudaCopyInterfaceCF(level);
-		cudaMemoryManager->cudaCopyInterfaceFC(level);
-		cudaMemoryManager->cudaCopyInterfaceOffCF(level);
-		cudaMemoryManager->cudaCopyInterfaceOffFC(level);
+        cudaMemoryManager->cudaCopyInterfaceCF(level);
+        cudaMemoryManager->cudaCopyInterfaceFC(level);
+        cudaMemoryManager->cudaCopyInterfaceOffCF(level);
+        cudaMemoryManager->cudaCopyInterfaceOffFC(level);
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     }
 }
 
 void GridGenerator::setDimensions()
 {
-	//std::vector<int> localGridNX(1);
-	//std::vector<int> localGridNY(1);
-	//std::vector<int> localGridNZ(1);
+    //std::vector<int> localGridNX(1);
+    //std::vector<int> localGridNY(1);
+    //std::vector<int> localGridNZ(1);
 
-	//builder->getDimensions(localGridNX[0], localGridNY[0], localGridNZ[0], 0);
+    //builder->getDimensions(localGridNX[0], localGridNY[0], localGridNZ[0], 0);
 
-	//para->setGridX(localGridNX);
-	//para->setGridY(localGridNY);
-	//para->setGridZ(localGridNZ);
+    //para->setGridX(localGridNX);
+    //para->setGridY(localGridNY);
+    //para->setGridZ(localGridNZ);
 }
 
 void GridGenerator::setBoundingBox()
 {
-	std::vector<int> localGridNX(1);
-	std::vector<int> localGridNY(1);
-	std::vector<int> localGridNZ(1);
-	builder->getDimensions(localGridNX[0], localGridNY[0], localGridNZ[0], 0);
-
-	std::vector<real> minX, maxX, minY, maxY, minZ, maxZ;
-	minX.push_back(0);
-	minY.push_back(0);
-	minZ.push_back(0);
-
-	maxX.push_back((real)localGridNX[0]);
-	maxY.push_back((real)localGridNY[0]);
-	maxZ.push_back((real)localGridNZ[0]);
-
-	para->setMinCoordX(minX);
-	para->setMinCoordY(minY);
-	para->setMinCoordZ(minZ);
-	para->setMaxCoordX(maxX);
-	para->setMaxCoordY(maxY);
-	para->setMaxCoordZ(maxZ);
+    std::vector<int> localGridNX(1);
+    std::vector<int> localGridNY(1);
+    std::vector<int> localGridNZ(1);
+    builder->getDimensions(localGridNX[0], localGridNY[0], localGridNZ[0], 0);
+
+    std::vector<real> minX, maxX, minY, maxY, minZ, maxZ;
+    minX.push_back(0);
+    minY.push_back(0);
+    minZ.push_back(0);
+
+    maxX.push_back((real)localGridNX[0]);
+    maxY.push_back((real)localGridNY[0]);
+    maxZ.push_back((real)localGridNZ[0]);
+
+    para->setMinCoordX(minX);
+    para->setMinCoordY(minY);
+    para->setMinCoordZ(minZ);
+    para->setMaxCoordX(maxX);
+    para->setMaxCoordY(maxY);
+    para->setMaxCoordZ(maxZ);
 }
 
 void GridGenerator::initPeriodicNeigh(std::vector<std::vector<std::vector<uint> > > periodV, std::vector<std::vector<uint> > periodIndex, std::string way)
@@ -1434,3 +1295,33 @@ std::string GridGenerator::checkNeighbor(int level, real x, real y, real z, int
     }
     return oss.str();
 }
+
+void GridGenerator::getPointersToBoundaryConditions(QforBoundaryConditions& boundaryConditionStruct, real* subgridDistances, const unsigned int numberOfBCnodes){
+    boundaryConditionStruct.q27[DIR_P00] =    &subgridDistances[DIR_P00   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M00] =    &subgridDistances[DIR_M00   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0P0] =    &subgridDistances[DIR_0P0   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0M0] =    &subgridDistances[DIR_0M0   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00P] =    &subgridDistances[DIR_00P   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_00M] =    &subgridDistances[DIR_00M   * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PP0] =   &subgridDistances[DIR_PP0  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MM0] =   &subgridDistances[DIR_MM0  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PM0] =   &subgridDistances[DIR_PM0  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MP0] =   &subgridDistances[DIR_MP0  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0P] =   &subgridDistances[DIR_P0P  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0M] =   &subgridDistances[DIR_M0M  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_P0M] =   &subgridDistances[DIR_P0M  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_M0P] =   &subgridDistances[DIR_M0P  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PP] =   &subgridDistances[DIR_0PP  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MM] =   &subgridDistances[DIR_0MM  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0PM] =   &subgridDistances[DIR_0PM  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_0MP] =   &subgridDistances[DIR_0MP  * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_000] = &subgridDistances[DIR_000* numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPP] =  &subgridDistances[DIR_PPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMP] =  &subgridDistances[DIR_MMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMP] =  &subgridDistances[DIR_PMP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPP] =  &subgridDistances[DIR_MPP * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PPM] =  &subgridDistances[DIR_PPM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MMM] =  &subgridDistances[DIR_MMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_PMM] =  &subgridDistances[DIR_PMM * numberOfBCnodes];
+    boundaryConditionStruct.q27[DIR_MPM] =  &subgridDistances[DIR_MPM * numberOfBCnodes];
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
index 991c59ca63dd77da974c044db363de16ebf6ff23..73cb0597067c63ba1ec40285dd9bede2349d5aa1 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/GridGenerator.h
@@ -1,3 +1,35 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file GridGenerator.h
+//! \ingroup DataStructureInitializer
+//! \author Martin Schoenherr
+//=======================================================================================
 #ifndef GridReaderGenerator_H
 #define GridReaderGenerator_H
 
@@ -13,56 +45,62 @@ class Parameter;
 class GridBuilder;
 class IndexRearrangementForStreams;
 
+//! \class GridGenerator derived class of GridProvider
+//! \brief mapping the grid of grid generator to data structure for simulation
 class GridGenerator
-	: public GridProvider
+    : public GridProvider
 {
 private:
-	std::vector<std::string> channelDirections;
-	std::vector<std::string> channelBoundaryConditions;
+    //! \brief string vector with channel direction
+    std::vector<std::string> channelDirections;
+    //! \brief string vector with channel direction (boundary conditions)
+    std::vector<std::string> channelBoundaryConditions;
 
-	std::shared_ptr<GridBuilder> builder;
+    std::shared_ptr<GridBuilder> builder;
     std::unique_ptr<IndexRearrangementForStreams> indexRearrangement;
 
 public:
     VIRTUALFLUIDS_GPU_EXPORT GridGenerator(std::shared_ptr<GridBuilder> builder, std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager, vf::gpu::Communicator& communicator);
-	VIRTUALFLUIDS_GPU_EXPORT virtual ~GridGenerator();
+    VIRTUALFLUIDS_GPU_EXPORT ~GridGenerator() override;
 
-	void allocArrays_CoordNeighborGeo() override;
+    //! \brief allocates and initialized the data structures for Coordinates and node types
+    void allocArrays_CoordNeighborGeo() override;
+    //! \brief allocates and initialized the values at the boundary conditions
     void allocArrays_BoundaryValues() override;
-
-	void allocArrays_BoundaryQs() override;
+    //! \brief allocates and initialized the sub-grid distances at the boundary conditions
+    void allocArrays_BoundaryQs() override;
     void allocArrays_OffsetScale() override;
     void allocArrays_fluidNodeIndices() override;
     void allocArrays_fluidNodeIndicesBorder() override;
 
-	virtual void setDimensions() override;
-	virtual void setBoundingBox() override;
+    virtual void setDimensions() override;
+    virtual void setBoundingBox() override;
 
-	virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) override;
-	
+    virtual void initPeriodicNeigh(std::vector<std::vector<std::vector<unsigned int> > > periodV, std::vector<std::vector<unsigned int> > periodIndex, std::string way) override;
+    
 private:
-	void setPressureValues(int channelSide) const;
-	void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
-
-	void setVelocityValues(int channelSide) const;
-	void setVelocity(int level, int sizePerLevel, int channelSide) const;
-
-	void setOutflowValues(int channelSide) const;
-	void setOutflow(int level, int sizePerLevel, int channelSide) const;
-
-	void setPressQs(int channelSide) const;
-	void setVelocityQs(int channelSide) const;
-	void setOutflowQs(int channelSide) const;
-	void setNoSlipQs(int channelSide) const;
-	void setGeoQs() const;
-	void modifyQElement(int channelSide, unsigned int level) const;
-
-	void initalQStruct(QforBoundaryConditions& Q,int channelSide, unsigned int level) const;
-	void printQSize(std::string bc,int channelSide, unsigned int level) const;
-	void setSizeNoSlip(int channelSide, unsigned int level) const;
-	void setSizeGeoQs(unsigned int level) const;
-	void setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const;
-	bool hasQs(int channelSide, unsigned int level) const;
+    void setPressureValues(int channelSide) const;
+    void setPressRhoBC(int sizePerLevel, int level, int channelSide) const;
+
+    void setVelocityValues(int channelSide) const;
+    void setVelocity(int level, int sizePerLevel, int channelSide) const;
+
+    void setOutflowValues(int channelSide) const;
+    void setOutflow(int level, int sizePerLevel, int channelSide) const;
+
+    void setPressQs(int channelSide) const;
+    void setVelocityQs(int channelSide) const;
+    void setOutflowQs(int channelSide) const;
+    void setNoSlipQs(int channelSide) const;
+    void setGeoQs() const;
+    void modifyQElement(int channelSide, unsigned int level) const;
+
+    void initalQStruct(QforBoundaryConditions& Q,int channelSide, unsigned int level) const;
+    void printQSize(std::string bc,int channelSide, unsigned int level) const;
+    void setSizeNoSlip(int channelSide, unsigned int level) const;
+    void setSizeGeoQs(unsigned int level) const;
+    void setQ27Size(QforBoundaryConditions &Q, real* QQ, unsigned int sizeQ) const;
+    bool hasQs(int channelSide, unsigned int level) const;
     
     void initalValuesDomainDecompostion();
 public:
@@ -70,10 +108,26 @@ public:
 
 
 private:
+    //! \brief verifies if there are invalid nodes, stopper nodes or wrong neighbors
     std::string verifyNeighborIndices(int level) const;
-    std::string verifyNeighborIndex(int level, int index, int &invalidNodes, int &stopperNodes, int &wrongNeighbors) const;
+    //! \brief verifies single neighbor index
+    //! \param index type integer
+    //! \param invalidNodes reference to invalid nodes
+    //! \param stopperNodes reference to stopper nodes
+    //! \param wrongNeighbors reference to wrong neighbors
+    std::string verifyNeighborIndex(int level, int index , int &invalidNodes, int &stopperNodes, int &wrongNeighbors) const;
+    //! \brief check the neighbors
+    //! \param x,y,z lattice node position
+    //! \param numberOfWrongNeighbors reference to the number of wrong neighbors
+    //! \param neighborIndex index of neighbor node
+    //! \param neighborX,neighborY,neighborZ neighbor lattice node position
+    //! \param direction type string
     std::string checkNeighbor(int level, real x, real y, real z, int index, int& numberOfWrongNeihgbors, int neighborIndex, real neighborX, real neighborY, real neighborZ, std::string direction) const;
-
+    //! \brief create the pointers in the struct for the BoundaryConditions from the boundary condition array
+    //! \param boundaryConditionStruct is a struct containing information about the boundary condition
+    //! \param subgridDistances is a pointer to an array containing the subgrid distances
+    //! \param numberOfBCnodes is the number of lattice nodes in the boundary condition
+    static void getPointersToBoundaryConditions(QforBoundaryConditions& boundaryConditionStruct, real* subgridDistances, const unsigned int numberOfBCnodes);
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
index 405370c905adc9937badde2f6e54f2d54942056b..58fbe518aead65555b70245e1865869199b4b8d1 100644
--- a/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/DataStructureInitializer/GridReaderGenerator/IndexRearrangementForStreamsTest.cpp
@@ -132,7 +132,8 @@ private:
 
     void SetUp() override
     {
-        para = std::make_shared<Parameter>(1, 0);
+        para = std::make_shared<Parameter>();
+        para->initLBMSimulationParameter();
         testSubject = createTestSubjectCFBorderBulk();
     }
 };
diff --git a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
index 056c25d39489abc9ae9491f771f05d345a57d02a..3be49570b33d99f9517796b33934dee1e2f31221 100644
--- a/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
+++ b/src/gpu/VirtualFluids_GPU/FindInterface/FindInterface.h
@@ -5,7 +5,7 @@
 #include "lbm/constants/D3Q27.h"
 
 
-extern "C" void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
+void interpolation(InterpolationCellCF &intCF, InterpolationCellFC &intFC, 
                                unsigned int LxCoarse, unsigned int LyCoarse, unsigned int LzCoarse, 
                                unsigned int LxFine, unsigned int LyFine, unsigned int LzFine, 
                                unsigned int dNx, unsigned int dNy, unsigned int dNz, 
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
index a67f1d987cb9636ee447c5f5acd1410c44cb6a62..d36d3f9a58246eb344a90a21709314ade414378c 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.cpp
@@ -33,13 +33,13 @@ void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	   para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
       printf("numberOfBCnodes= %d\n", para->getParH(lev)->noSlipBC.numberOfBCnodes);
 
-	  cudaMemoryManager->cudaAllocWallBC(lev);
+	  cudaMemoryManager->cudaAllocNoSlipBC(lev);
 
       findQ(para, lev);
  	  para->getParD(lev)->noSlipBC.numberOfBCnodes = para->getParH(lev)->noSlipBC.numberOfBCnodes;
       printf("numberOfBCnodes= %d\n", para->getParH(lev)->noSlipBC.numberOfBCnodes);
 
-	  cudaMemoryManager->cudaCopyWallBC(lev);
+	  cudaMemoryManager->cudaCopyNoSlipBC(lev);
    }
 }
 
@@ -48,7 +48,7 @@ void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 {
-   if ( para->getMyID() == 0)
+   if ( para->getMyProcessID() == 0)
    {
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Inflow
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
index 92934f85198f9141c97e78a433049d01f3c4b075..045d77f25cab213dfd130fe068f0724b6955bc4d 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
+++ b/src/gpu/VirtualFluids_GPU/FindQ/DefineBCs.h
@@ -6,10 +6,10 @@
 #include "Parameter/Parameter.h"
 #include "GPU/CudaMemoryManager.h"
 
-extern "C" void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findQ27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findBC27(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
-extern "C" void findPressQShip(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void findPressQShip(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
index 7932ddf948fa1d1a8941a26821ab58577ae0e70c..19be37ed5324f48627506bb3e2508a9a1b97cf52 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
+++ b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.cpp
@@ -14,7 +14,7 @@ void findQ(Parameter* para, int lev)
    VF_LOG_CRITICAL("findQ() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -41,33 +41,33 @@ void findQ(Parameter* para, int lev)
    //real        radius  = nny / 5.f;//2.56f;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    QforBoundaryConditions Q;
-   Q.q27[E   ] = &QQ[E   *sizeQ];
-   Q.q27[W   ] = &QQ[W   *sizeQ];
-   Q.q27[N   ] = &QQ[N   *sizeQ];
-   Q.q27[S   ] = &QQ[S   *sizeQ];
-   Q.q27[T   ] = &QQ[T   *sizeQ];
-   Q.q27[B   ] = &QQ[B   *sizeQ];
-   Q.q27[NE  ] = &QQ[NE  *sizeQ];
-   Q.q27[SW  ] = &QQ[SW  *sizeQ];
-   Q.q27[SE  ] = &QQ[SE  *sizeQ];
-   Q.q27[NW  ] = &QQ[NW  *sizeQ];
-   Q.q27[TE  ] = &QQ[TE  *sizeQ];
-   Q.q27[BW  ] = &QQ[BW  *sizeQ];
-   Q.q27[BE  ] = &QQ[BE  *sizeQ];
-   Q.q27[TW  ] = &QQ[TW  *sizeQ];
-   Q.q27[TN  ] = &QQ[TN  *sizeQ];
-   Q.q27[BS  ] = &QQ[BS  *sizeQ];
-   Q.q27[BN  ] = &QQ[BN  *sizeQ];
-   Q.q27[TS  ] = &QQ[TS  *sizeQ];
-   Q.q27[REST] = &QQ[REST*sizeQ];
-   Q.q27[TNE ] = &QQ[TNE *sizeQ];
-   Q.q27[TSW ] = &QQ[TSW *sizeQ];
-   Q.q27[TSE ] = &QQ[TSE *sizeQ];
-   Q.q27[TNW ] = &QQ[TNW *sizeQ];
-   Q.q27[BNE ] = &QQ[BNE *sizeQ];
-   Q.q27[BSW ] = &QQ[BSW *sizeQ];
-   Q.q27[BSE ] = &QQ[BSE *sizeQ];
-   Q.q27[BNW ] = &QQ[BNW *sizeQ];
+   Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+   Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+   Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+   Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+   Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+   Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+   Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+   Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+   Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+   Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+   Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+   Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+   Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+   Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+   Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+   Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+   Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+   Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+   Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+   Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+   Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+   Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+   Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+   Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+   Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+   Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+   Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    for(k=STARTOFFZ + 1 ; k<=nnz+STARTOFFZ-2 ; k++){
       for(j=STARTOFFY + 1 ; j<=nny+STARTOFFY-2 ; j++){          //j<=nny/2+STARTOFFY     //j<=STARTOFFY+1
@@ -97,89 +97,89 @@ void findQ(Parameter* para, int lev)
                {
                   QIN.k[QIN.numberOfBCnodes]          = kk[m];
 
-                  //Q.q27[E   ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[W   ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[N   ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[S   ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[T   ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[B   ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[NE  ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[SW  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[SE  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[NW  ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[TE  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[BW  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[BE  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[TW  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[TN  ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[BS  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[BN  ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[TS  ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[REST][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[TNE ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[TSW ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[TSE ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[TNW ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[BNE ][QIN.numberOfBCnodes] = 0.f;
-                  //Q.q27[BSW ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[BSE ][QIN.numberOfBCnodes] = -1.f;
-                  //Q.q27[BNW ][QIN.numberOfBCnodes] = 0.f;
-
-                  //Q.q27[E   ][QIN.numberOfBCnodes] = ON[W   ];
-                  //Q.q27[W   ][QIN.numberOfBCnodes] = ON[E   ];
-                  //Q.q27[N   ][QIN.numberOfBCnodes] = ON[S   ];
-                  //Q.q27[S   ][QIN.numberOfBCnodes] = ON[N   ];
-                  //Q.q27[T   ][QIN.numberOfBCnodes] = ON[B   ];
-                  //Q.q27[B   ][QIN.numberOfBCnodes] = ON[T   ];
-                  //Q.q27[NE  ][QIN.numberOfBCnodes] = ON[SW  ];
-                  //Q.q27[SW  ][QIN.numberOfBCnodes] = ON[NE  ];
-                  //Q.q27[SE  ][QIN.numberOfBCnodes] = ON[NW  ];
-                  //Q.q27[NW  ][QIN.numberOfBCnodes] = ON[SE  ];
-                  //Q.q27[TE  ][QIN.numberOfBCnodes] = ON[BW  ];
-                  //Q.q27[BW  ][QIN.numberOfBCnodes] = ON[TE  ];
-                  //Q.q27[BE  ][QIN.numberOfBCnodes] = ON[TW  ];
-                  //Q.q27[TW  ][QIN.numberOfBCnodes] = ON[BE  ];
-                  //Q.q27[TN  ][QIN.numberOfBCnodes] = ON[BS  ];
-                  //Q.q27[BS  ][QIN.numberOfBCnodes] = ON[TN  ];
-                  //Q.q27[BN  ][QIN.numberOfBCnodes] = ON[TS  ];
-                  //Q.q27[TS  ][QIN.numberOfBCnodes] = ON[BN  ];
-                  //Q.q27[REST][QIN.numberOfBCnodes] = ON[REST];
-                  //Q.q27[TNE ][QIN.numberOfBCnodes] = ON[BSW ];
-                  //Q.q27[TSW ][QIN.numberOfBCnodes] = ON[BNE ];
-                  //Q.q27[TSE ][QIN.numberOfBCnodes] = ON[BNW ];
-                  //Q.q27[TNW ][QIN.numberOfBCnodes] = ON[BSE ];
-                  //Q.q27[BNE ][QIN.numberOfBCnodes] = ON[TSW ];
-                  //Q.q27[BSW ][QIN.numberOfBCnodes] = ON[TNE ];
-                  //Q.q27[BSE ][QIN.numberOfBCnodes] = ON[TNW ];
-                  //Q.q27[BNW ][QIN.numberOfBCnodes] = ON[TSE ];
-
-                  Q.q27[E   ][QIN.numberOfBCnodes] = ON[E   ];
-                  Q.q27[W   ][QIN.numberOfBCnodes] = ON[W   ];
-                  Q.q27[N   ][QIN.numberOfBCnodes] = ON[N   ];
-                  Q.q27[S   ][QIN.numberOfBCnodes] = ON[S   ];
-                  Q.q27[T   ][QIN.numberOfBCnodes] = ON[T   ];
-                  Q.q27[B   ][QIN.numberOfBCnodes] = ON[B   ];
-                  Q.q27[NE  ][QIN.numberOfBCnodes] = ON[NE  ];
-                  Q.q27[SW  ][QIN.numberOfBCnodes] = ON[SW  ];
-                  Q.q27[SE  ][QIN.numberOfBCnodes] = ON[SE  ];
-                  Q.q27[NW  ][QIN.numberOfBCnodes] = ON[NW  ];
-                  Q.q27[TE  ][QIN.numberOfBCnodes] = ON[TE  ];
-                  Q.q27[BW  ][QIN.numberOfBCnodes] = ON[BW  ];
-                  Q.q27[BE  ][QIN.numberOfBCnodes] = ON[BE  ];
-                  Q.q27[TW  ][QIN.numberOfBCnodes] = ON[TW  ];
-                  Q.q27[TN  ][QIN.numberOfBCnodes] = ON[TN  ];
-                  Q.q27[BS  ][QIN.numberOfBCnodes] = ON[BS  ];
-                  Q.q27[BN  ][QIN.numberOfBCnodes] = ON[BN  ];
-                  Q.q27[TS  ][QIN.numberOfBCnodes] = ON[TS  ];
-                  Q.q27[REST][QIN.numberOfBCnodes] = ON[REST];
-                  Q.q27[TNE ][QIN.numberOfBCnodes] = ON[TNE ];
-                  Q.q27[TSW ][QIN.numberOfBCnodes] = ON[TSW ];
-                  Q.q27[TSE ][QIN.numberOfBCnodes] = ON[TSE ];
-                  Q.q27[TNW ][QIN.numberOfBCnodes] = ON[TNW ];
-                  Q.q27[BNE ][QIN.numberOfBCnodes] = ON[BNE ];
-                  Q.q27[BSW ][QIN.numberOfBCnodes] = ON[BSW ];
-                  Q.q27[BSE ][QIN.numberOfBCnodes] = ON[BSE ];
-                  Q.q27[BNW ][QIN.numberOfBCnodes] = ON[BNW ];
+                  //Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_000][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = 0.f;
+                  //Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = -1.f;
+                  //Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = 0.f;
+
+                  //Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = ON[DIR_M00   ];
+                  //Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = ON[DIR_P00   ];
+                  //Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = ON[DIR_0M0   ];
+                  //Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = ON[DIR_0P0   ];
+                  //Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = ON[DIR_00M   ];
+                  //Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = ON[DIR_00P   ];
+                  //Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = ON[DIR_MM0  ];
+                  //Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = ON[DIR_PP0  ];
+                  //Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = ON[DIR_MP0  ];
+                  //Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = ON[DIR_PM0  ];
+                  //Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = ON[DIR_M0M  ];
+                  //Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = ON[DIR_P0P  ];
+                  //Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = ON[DIR_M0P  ];
+                  //Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = ON[DIR_P0M  ];
+                  //Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = ON[DIR_0MM  ];
+                  //Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = ON[DIR_0PP  ];
+                  //Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = ON[DIR_0MP  ];
+                  //Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = ON[DIR_0PM  ];
+                  //Q.q27[DIR_000][QIN.numberOfBCnodes] = ON[DIR_000];
+                  //Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = ON[DIR_MMM ];
+                  //Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = ON[DIR_PPM ];
+                  //Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = ON[DIR_MPM ];
+                  //Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = ON[DIR_PMM ];
+                  //Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = ON[DIR_MMP ];
+                  //Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = ON[DIR_PPP ];
+                  //Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = ON[DIR_MPP ];
+                  //Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = ON[DIR_PMP ];
+
+                  Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = ON[DIR_P00   ];
+                  Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = ON[DIR_M00   ];
+                  Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = ON[DIR_0P0   ];
+                  Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = ON[DIR_0M0   ];
+                  Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = ON[DIR_00P   ];
+                  Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = ON[DIR_00M   ];
+                  Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = ON[DIR_PP0  ];
+                  Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = ON[DIR_MM0  ];
+                  Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = ON[DIR_PM0  ];
+                  Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = ON[DIR_MP0  ];
+                  Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = ON[DIR_P0P  ];
+                  Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = ON[DIR_M0M  ];
+                  Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = ON[DIR_P0M  ];
+                  Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = ON[DIR_M0P  ];
+                  Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = ON[DIR_0PP  ];
+                  Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = ON[DIR_0MM  ];
+                  Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = ON[DIR_0PM  ];
+                  Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = ON[DIR_0MP  ];
+                  Q.q27[DIR_000][QIN.numberOfBCnodes] = ON[DIR_000];
+                  Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = ON[DIR_PPP ];
+                  Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = ON[DIR_MMP ];
+                  Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = ON[DIR_PMP ];
+                  Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = ON[DIR_MPP ];
+                  Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = ON[DIR_PPM ];
+                  Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = ON[DIR_MMM ];
+                  Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = ON[DIR_PMM ];
+                  Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = ON[DIR_MPM ];
 
                   QIN.numberOfBCnodes++;
                }
@@ -199,7 +199,7 @@ void findKforQ(Parameter* para, int lev)
    VF_LOG_CRITICAL("findKforQ() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -247,33 +247,33 @@ void findKforQ(Parameter* para, int lev)
 void findQ_MG( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk, unsigned int sizeQ, real* QQ, QforBoundaryConditions &QIN)
 {
    QforBoundaryConditions Q;
-   Q.q27[E   ] = &QQ[E   *sizeQ];
-   Q.q27[W   ] = &QQ[W   *sizeQ];
-   Q.q27[N   ] = &QQ[N   *sizeQ];
-   Q.q27[S   ] = &QQ[S   *sizeQ];
-   Q.q27[T   ] = &QQ[T   *sizeQ];
-   Q.q27[B   ] = &QQ[B   *sizeQ];
-   Q.q27[NE  ] = &QQ[NE  *sizeQ];
-   Q.q27[SW  ] = &QQ[SW  *sizeQ];
-   Q.q27[SE  ] = &QQ[SE  *sizeQ];
-   Q.q27[NW  ] = &QQ[NW  *sizeQ];
-   Q.q27[TE  ] = &QQ[TE  *sizeQ];
-   Q.q27[BW  ] = &QQ[BW  *sizeQ];
-   Q.q27[BE  ] = &QQ[BE  *sizeQ];
-   Q.q27[TW  ] = &QQ[TW  *sizeQ];
-   Q.q27[TN  ] = &QQ[TN  *sizeQ];
-   Q.q27[BS  ] = &QQ[BS  *sizeQ];
-   Q.q27[BN  ] = &QQ[BN  *sizeQ];
-   Q.q27[TS  ] = &QQ[TS  *sizeQ];
-   Q.q27[REST] = &QQ[REST*sizeQ];
-   Q.q27[TNE ] = &QQ[TNE *sizeQ];
-   Q.q27[TSW ] = &QQ[TSW *sizeQ];
-   Q.q27[TSE ] = &QQ[TSE *sizeQ];
-   Q.q27[TNW ] = &QQ[TNW *sizeQ];
-   Q.q27[BNE ] = &QQ[BNE *sizeQ];
-   Q.q27[BSW ] = &QQ[BSW *sizeQ];
-   Q.q27[BSE ] = &QQ[BSE *sizeQ];
-   Q.q27[BNW ] = &QQ[BNW *sizeQ];
+   Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+   Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+   Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+   Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+   Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+   Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+   Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+   Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+   Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+   Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+   Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+   Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+   Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+   Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+   Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+   Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+   Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+   Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+   Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+   Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+   Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+   Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+   Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+   Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+   Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+   Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+   Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
 
    // ! CAUTION ! Do not use this function!
    // As the order of the distributions was changed in July 2022, this does not work anymore.
@@ -282,7 +282,7 @@ void findQ_MG( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int
     VF_LOG_CRITICAL("findQ_MG() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -327,33 +327,33 @@ void findQ_MG( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int
                {
                   QIN.k[QIN.numberOfBCnodes]          = kk[m];
 
-                  Q.q27[E   ][QIN.numberOfBCnodes] = ON[E   ];
-                  Q.q27[W   ][QIN.numberOfBCnodes] = ON[W   ];
-                  Q.q27[N   ][QIN.numberOfBCnodes] = ON[N   ];
-                  Q.q27[S   ][QIN.numberOfBCnodes] = ON[S   ];
-                  Q.q27[T   ][QIN.numberOfBCnodes] = ON[T   ];
-                  Q.q27[B   ][QIN.numberOfBCnodes] = ON[B   ];
-                  Q.q27[NE  ][QIN.numberOfBCnodes] = ON[NE  ];
-                  Q.q27[SW  ][QIN.numberOfBCnodes] = ON[SW  ];
-                  Q.q27[SE  ][QIN.numberOfBCnodes] = ON[SE  ];
-                  Q.q27[NW  ][QIN.numberOfBCnodes] = ON[NW  ];
-                  Q.q27[TE  ][QIN.numberOfBCnodes] = ON[TE  ];
-                  Q.q27[BW  ][QIN.numberOfBCnodes] = ON[BW  ];
-                  Q.q27[BE  ][QIN.numberOfBCnodes] = ON[BE  ];
-                  Q.q27[TW  ][QIN.numberOfBCnodes] = ON[TW  ];
-                  Q.q27[TN  ][QIN.numberOfBCnodes] = ON[TN  ];
-                  Q.q27[BS  ][QIN.numberOfBCnodes] = ON[BS  ];
-                  Q.q27[BN  ][QIN.numberOfBCnodes] = ON[BN  ];
-                  Q.q27[TS  ][QIN.numberOfBCnodes] = ON[TS  ];
-                  Q.q27[REST][QIN.numberOfBCnodes] = ON[REST];
-                  Q.q27[TNE ][QIN.numberOfBCnodes] = ON[TNE ];
-                  Q.q27[TSW ][QIN.numberOfBCnodes] = ON[TSW ];
-                  Q.q27[TSE ][QIN.numberOfBCnodes] = ON[TSE ];
-                  Q.q27[TNW ][QIN.numberOfBCnodes] = ON[TNW ];
-                  Q.q27[BNE ][QIN.numberOfBCnodes] = ON[BNE ];
-                  Q.q27[BSW ][QIN.numberOfBCnodes] = ON[BSW ];
-                  Q.q27[BSE ][QIN.numberOfBCnodes] = ON[BSE ];
-                  Q.q27[BNW ][QIN.numberOfBCnodes] = ON[BNW ];
+                  Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = ON[DIR_P00   ];
+                  Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = ON[DIR_M00   ];
+                  Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = ON[DIR_0P0   ];
+                  Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = ON[DIR_0M0   ];
+                  Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = ON[DIR_00P   ];
+                  Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = ON[DIR_00M   ];
+                  Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = ON[DIR_PP0  ];
+                  Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = ON[DIR_MM0  ];
+                  Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = ON[DIR_PM0  ];
+                  Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = ON[DIR_MP0  ];
+                  Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = ON[DIR_P0P  ];
+                  Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = ON[DIR_M0M  ];
+                  Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = ON[DIR_P0M  ];
+                  Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = ON[DIR_M0P  ];
+                  Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = ON[DIR_0PP  ];
+                  Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = ON[DIR_0MM  ];
+                  Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = ON[DIR_0PM  ];
+                  Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = ON[DIR_0MP  ];
+                  Q.q27[DIR_000][QIN.numberOfBCnodes] = ON[DIR_000];
+                  Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = ON[DIR_PPP ];
+                  Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = ON[DIR_MMP ];
+                  Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = ON[DIR_PMP ];
+                  Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = ON[DIR_MPP ];
+                  Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = ON[DIR_PPM ];
+                  Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = ON[DIR_MMM ];
+                  Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = ON[DIR_PMM ];
+                  Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = ON[DIR_MPM ];
 
                   QIN.numberOfBCnodes++;
                }
@@ -373,7 +373,7 @@ void findKforQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned i
     VF_LOG_CRITICAL("findKforQ_MG() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -448,33 +448,33 @@ void findQInflow(Parameter* para)
    QIN.numberOfBCnodes = 0;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    QforBoundaryConditions Q;
-   Q.q27[E   ] = &QQ[E   *sizeQ];
-   Q.q27[W   ] = &QQ[W   *sizeQ];
-   Q.q27[N   ] = &QQ[N   *sizeQ];
-   Q.q27[S   ] = &QQ[S   *sizeQ];
-   Q.q27[T   ] = &QQ[T   *sizeQ];
-   Q.q27[B   ] = &QQ[B   *sizeQ];
-   Q.q27[NE  ] = &QQ[NE  *sizeQ];
-   Q.q27[SW  ] = &QQ[SW  *sizeQ];
-   Q.q27[SE  ] = &QQ[SE  *sizeQ];
-   Q.q27[NW  ] = &QQ[NW  *sizeQ];
-   Q.q27[TE  ] = &QQ[TE  *sizeQ];
-   Q.q27[BW  ] = &QQ[BW  *sizeQ];
-   Q.q27[BE  ] = &QQ[BE  *sizeQ];
-   Q.q27[TW  ] = &QQ[TW  *sizeQ];
-   Q.q27[TN  ] = &QQ[TN  *sizeQ];
-   Q.q27[BS  ] = &QQ[BS  *sizeQ];
-   Q.q27[BN  ] = &QQ[BN  *sizeQ];
-   Q.q27[TS  ] = &QQ[TS  *sizeQ];
-   Q.q27[REST] = &QQ[REST*sizeQ];
-   Q.q27[TNE ] = &QQ[TNE *sizeQ];
-   Q.q27[TSW ] = &QQ[TSW *sizeQ];
-   Q.q27[TSE ] = &QQ[TSE *sizeQ];
-   Q.q27[TNW ] = &QQ[TNW *sizeQ];
-   Q.q27[BNE ] = &QQ[BNE *sizeQ];
-   Q.q27[BSW ] = &QQ[BSW *sizeQ];
-   Q.q27[BSE ] = &QQ[BSE *sizeQ];
-   Q.q27[BNW ] = &QQ[BNW *sizeQ];
+   Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+   Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+   Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+   Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+   Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+   Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+   Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+   Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+   Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+   Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+   Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+   Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+   Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+   Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+   Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+   Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+   Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+   Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+   Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+   Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+   Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+   Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+   Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+   Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+   Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+   Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+   Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //unsigned int li = ((nnx+STARTOFFX-2)-(STARTOFFX+1)-1);
    //unsigned int lj = ((nny+STARTOFFY-2)-(STARTOFFY+1)-1);
@@ -497,33 +497,33 @@ void findQInflow(Parameter* para)
                //vz[QIN.numberOfBCnodes]             = (real)(16.f*(u0*2.f)*i*j*(nx-i)*(ny-j))/(nx*nx*ny*ny);
                deltaVz[QIN.numberOfBCnodes]        = (real)0.f;
 			   //////////////////////////////////////////////////////////////////////////
-               //Q.q27[E   ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[W   ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[N   ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[S   ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[T   ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[B   ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[NE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[SW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[SE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[NW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[BW  ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[BE  ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[TW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TN  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[BS  ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[BN  ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[TS  ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[REST][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TNE ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TSW ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TSE ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[TNW ][QIN.numberOfBCnodes] = (real)-1.f;
-               //Q.q27[BNE ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[BSW ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[BSE ][QIN.numberOfBCnodes] = (real)1.f;
-               //Q.q27[BNW ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_000][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = (real)-1.f;
+               //Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = (real)1.f;
+               //Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = (real)1.f;
 			   //////////////////////////////////////////////////////////////////////////
 
 
@@ -531,33 +531,33 @@ void findQInflow(Parameter* para)
    // As the order of the distributions was changed in July 2022, this does not work anymore.
    // https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/14
 
-			   Q.q27[E   ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[W   ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[N   ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[S   ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[T   ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[B   ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[NE  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[SW  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[SE  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[NW  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[TE  ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[BW  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[BE  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[TW  ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[TN  ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[BS  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[BN  ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[TS  ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[REST][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[TNE ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[TSW ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[TSE ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[TNW ][QIN.numberOfBCnodes] = (real)1.f;
-			   Q.q27[BNE ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[BSW ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[BSE ][QIN.numberOfBCnodes] = (real)-1.f;
-			   Q.q27[BNW ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_000][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = (real)1.f;
+			   Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = (real)-1.f;
+			   Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = (real)-1.f;
 			   //////////////////////////////////////////////////////////////////////////
 			   QIN.numberOfBCnodes++;
             }
@@ -588,33 +588,33 @@ void findQInflow(Parameter* para)
    //               vy[QIN.numberOfBCnodes]             = 0.f;
    //               vz[QIN.numberOfBCnodes]             = u0;
 
-   //               Q.q27[E   ][QIN.numberOfBCnodes] = ON[E   ];
-   //               Q.q27[W   ][QIN.numberOfBCnodes] = ON[W   ];
-   //               Q.q27[N   ][QIN.numberOfBCnodes] = ON[N   ];
-   //               Q.q27[S   ][QIN.numberOfBCnodes] = ON[S   ];
-   //               Q.q27[T   ][QIN.numberOfBCnodes] = ON[T   ];
-   //               Q.q27[B   ][QIN.numberOfBCnodes] = ON[B   ];
-   //               Q.q27[NE  ][QIN.numberOfBCnodes] = ON[NE  ];
-   //               Q.q27[SW  ][QIN.numberOfBCnodes] = ON[SW  ];
-   //               Q.q27[SE  ][QIN.numberOfBCnodes] = ON[SE  ];
-   //               Q.q27[NW  ][QIN.numberOfBCnodes] = ON[NW  ];
-   //               Q.q27[TE  ][QIN.numberOfBCnodes] = ON[TE  ];
-   //               Q.q27[BW  ][QIN.numberOfBCnodes] = ON[BW  ];
-   //               Q.q27[BE  ][QIN.numberOfBCnodes] = ON[BE  ];
-   //               Q.q27[TW  ][QIN.numberOfBCnodes] = ON[TW  ];
-   //               Q.q27[TN  ][QIN.numberOfBCnodes] = ON[TN  ];
-   //               Q.q27[BS  ][QIN.numberOfBCnodes] = ON[BS  ];
-   //               Q.q27[BN  ][QIN.numberOfBCnodes] = ON[BN  ];
-   //               Q.q27[TS  ][QIN.numberOfBCnodes] = ON[TS  ];
-   //               Q.q27[REST][QIN.numberOfBCnodes] = ON[REST];
-   //               Q.q27[TNE ][QIN.numberOfBCnodes] = ON[TNE ];
-   //               Q.q27[TSW ][QIN.numberOfBCnodes] = ON[TSW ];
-   //               Q.q27[TSE ][QIN.numberOfBCnodes] = ON[TSE ];
-   //               Q.q27[TNW ][QIN.numberOfBCnodes] = ON[TNW ];
-   //               Q.q27[BNE ][QIN.numberOfBCnodes] = ON[BNE ];
-   //               Q.q27[BSW ][QIN.numberOfBCnodes] = ON[BSW ];
-   //               Q.q27[BSE ][QIN.numberOfBCnodes] = ON[BSE ];
-   //               Q.q27[BNW ][QIN.numberOfBCnodes] = ON[BNW ];
+   //               Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = ON[DIR_P00   ];
+   //               Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = ON[DIR_M00   ];
+   //               Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = ON[DIR_0P0   ];
+   //               Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = ON[DIR_0M0   ];
+   //               Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = ON[DIR_00P   ];
+   //               Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = ON[DIR_00M   ];
+   //               Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = ON[DIR_PP0  ];
+   //               Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = ON[DIR_MM0  ];
+   //               Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = ON[DIR_PM0  ];
+   //               Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = ON[DIR_MP0  ];
+   //               Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = ON[DIR_P0P  ];
+   //               Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = ON[DIR_M0M  ];
+   //               Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = ON[DIR_P0M  ];
+   //               Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = ON[DIR_M0P  ];
+   //               Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = ON[DIR_0PP  ];
+   //               Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = ON[DIR_0MM  ];
+   //               Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = ON[DIR_0PM  ];
+   //               Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = ON[DIR_0MP  ];
+   //               Q.q27[DIR_000][QIN.numberOfBCnodes] = ON[DIR_000];
+   //               Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = ON[DIR_PPP ];
+   //               Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = ON[DIR_MMP ];
+   //               Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = ON[DIR_PMP ];
+   //               Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = ON[DIR_MPP ];
+   //               Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = ON[DIR_PPM ];
+   //               Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = ON[DIR_MMM ];
+   //               Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = ON[DIR_PMM ];
+   //               Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = ON[DIR_MPM ];
 
    //               QIN.numberOfBCnodes++;
    //            }
@@ -647,33 +647,33 @@ void findQInflow(Parameter* para)
    //               vy[QIN.numberOfBCnodes]             = 0.f;
    //               vz[QIN.numberOfBCnodes]             = 0.f;//u0;
 
-   //               Q.q27[E   ][QIN.numberOfBCnodes] = ON[E   ];
-   //               Q.q27[W   ][QIN.numberOfBCnodes] = ON[W   ];
-   //               Q.q27[N   ][QIN.numberOfBCnodes] = ON[N   ];
-   //               Q.q27[S   ][QIN.numberOfBCnodes] = ON[S   ];
-   //               Q.q27[T   ][QIN.numberOfBCnodes] = ON[T   ];
-   //               Q.q27[B   ][QIN.numberOfBCnodes] = ON[B   ];
-   //               Q.q27[NE  ][QIN.numberOfBCnodes] = ON[NE  ];
-   //               Q.q27[SW  ][QIN.numberOfBCnodes] = ON[SW  ];
-   //               Q.q27[SE  ][QIN.numberOfBCnodes] = ON[SE  ];
-   //               Q.q27[NW  ][QIN.numberOfBCnodes] = ON[NW  ];
-   //               Q.q27[TE  ][QIN.numberOfBCnodes] = ON[TE  ];
-   //               Q.q27[BW  ][QIN.numberOfBCnodes] = ON[BW  ];
-   //               Q.q27[BE  ][QIN.numberOfBCnodes] = ON[BE  ];
-   //               Q.q27[TW  ][QIN.numberOfBCnodes] = ON[TW  ];
-   //               Q.q27[TN  ][QIN.numberOfBCnodes] = ON[TN  ];
-   //               Q.q27[BS  ][QIN.numberOfBCnodes] = ON[BS  ];
-   //               Q.q27[BN  ][QIN.numberOfBCnodes] = ON[BN  ];
-   //               Q.q27[TS  ][QIN.numberOfBCnodes] = ON[TS  ];
-   //               Q.q27[REST][QIN.numberOfBCnodes] = ON[REST];
-   //               Q.q27[TNE ][QIN.numberOfBCnodes] = ON[TNE ];
-   //               Q.q27[TSW ][QIN.numberOfBCnodes] = ON[TSW ];
-   //               Q.q27[TSE ][QIN.numberOfBCnodes] = ON[TSE ];
-   //               Q.q27[TNW ][QIN.numberOfBCnodes] = ON[TNW ];
-   //               Q.q27[BNE ][QIN.numberOfBCnodes] = ON[BNE ];
-   //               Q.q27[BSW ][QIN.numberOfBCnodes] = ON[BSW ];
-   //               Q.q27[BSE ][QIN.numberOfBCnodes] = ON[BSE ];
-   //               Q.q27[BNW ][QIN.numberOfBCnodes] = ON[BNW ];
+   //               Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = ON[DIR_P00   ];
+   //               Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = ON[DIR_M00   ];
+   //               Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = ON[DIR_0P0   ];
+   //               Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = ON[DIR_0M0   ];
+   //               Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = ON[DIR_00P   ];
+   //               Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = ON[DIR_00M   ];
+   //               Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = ON[DIR_PP0  ];
+   //               Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = ON[DIR_MM0  ];
+   //               Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = ON[DIR_PM0  ];
+   //               Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = ON[DIR_MP0  ];
+   //               Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = ON[DIR_P0P  ];
+   //               Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = ON[DIR_M0M  ];
+   //               Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = ON[DIR_P0M  ];
+   //               Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = ON[DIR_M0P  ];
+   //               Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = ON[DIR_0PP  ];
+   //               Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = ON[DIR_0MM  ];
+   //               Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = ON[DIR_0PM  ];
+   //               Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = ON[DIR_0MP  ];
+   //               Q.q27[DIR_000][QIN.numberOfBCnodes] = ON[DIR_000];
+   //               Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = ON[DIR_PPP ];
+   //               Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = ON[DIR_MMP ];
+   //               Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = ON[DIR_PMP ];
+   //               Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = ON[DIR_MPP ];
+   //               Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = ON[DIR_PPM ];
+   //               Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = ON[DIR_MMM ];
+   //               Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = ON[DIR_PMM ];
+   //               Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = ON[DIR_MPM ];
 
    //               QIN.numberOfBCnodes++;
    //            }
@@ -692,7 +692,7 @@ void findKforQInflow(Parameter* para)
     VF_LOG_CRITICAL("findKforQInflow() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    //int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    //int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -780,7 +780,7 @@ void findQOutflow(Parameter* para)
     VF_LOG_CRITICAL("findQOutflow() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    //int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    //int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    //int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -814,33 +814,33 @@ void findQOutflow(Parameter* para)
    QIN.numberOfBCnodes = 0;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    QforBoundaryConditions Q;
-   Q.q27[E   ] = &QQ[E   *sizeQ];
-   Q.q27[W   ] = &QQ[W   *sizeQ];
-   Q.q27[N   ] = &QQ[N   *sizeQ];
-   Q.q27[S   ] = &QQ[S   *sizeQ];
-   Q.q27[T   ] = &QQ[T   *sizeQ];
-   Q.q27[B   ] = &QQ[B   *sizeQ];
-   Q.q27[NE  ] = &QQ[NE  *sizeQ];
-   Q.q27[SW  ] = &QQ[SW  *sizeQ];
-   Q.q27[SE  ] = &QQ[SE  *sizeQ];
-   Q.q27[NW  ] = &QQ[NW  *sizeQ];
-   Q.q27[TE  ] = &QQ[TE  *sizeQ];
-   Q.q27[BW  ] = &QQ[BW  *sizeQ];
-   Q.q27[BE  ] = &QQ[BE  *sizeQ];
-   Q.q27[TW  ] = &QQ[TW  *sizeQ];
-   Q.q27[TN  ] = &QQ[TN  *sizeQ];
-   Q.q27[BS  ] = &QQ[BS  *sizeQ];
-   Q.q27[BN  ] = &QQ[BN  *sizeQ];
-   Q.q27[TS  ] = &QQ[TS  *sizeQ];
-   Q.q27[REST] = &QQ[REST*sizeQ];
-   Q.q27[TNE ] = &QQ[TNE *sizeQ];
-   Q.q27[TSW ] = &QQ[TSW *sizeQ];
-   Q.q27[TSE ] = &QQ[TSE *sizeQ];
-   Q.q27[TNW ] = &QQ[TNW *sizeQ];
-   Q.q27[BNE ] = &QQ[BNE *sizeQ];
-   Q.q27[BSW ] = &QQ[BSW *sizeQ];
-   Q.q27[BSE ] = &QQ[BSE *sizeQ];
-   Q.q27[BNW ] = &QQ[BNW *sizeQ];
+   Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+   Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+   Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+   Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+   Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+   Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+   Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+   Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+   Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+   Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+   Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+   Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+   Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+   Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+   Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+   Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+   Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+   Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+   Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+   Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+   Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+   Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+   Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+   Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+   Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+   Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+   Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
 
 
    //unsigned int li = ((nnx+STARTOFFX-2)-(STARTOFFX+1)-1);
@@ -862,33 +862,33 @@ void findQOutflow(Parameter* para)
                //vz[QIN.numberOfBCnodes]             =  (real)(16.f*(u0*2.f)*(i-(STARTOFFX+1)-0.5f)*(li-1.5f-(i-(STARTOFFX+1)))*(j-(STARTOFFY+1)-0.5f)*(lj-1.5f-(j-(STARTOFFY+1))))/(li*lj*li*lj);
                //vz[QIN.numberOfBCnodes]             = (real)(16.f*(u0*2.f)*i*j*(nx-i)*(ny-j))/(nx*nx*ny*ny);
                deltaVz[QIN.numberOfBCnodes]        = (real)0.f;
-               Q.q27[E   ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[W   ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[N   ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[S   ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[T   ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[B   ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[NE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[SW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[SE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[NW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[TE  ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[BW  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[BE  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[TW  ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[TN  ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[BS  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[BN  ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[TS  ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[REST][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[TNE ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[TSW ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[TSE ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[TNW ][QIN.numberOfBCnodes] = (real)1.f;
-               Q.q27[BNE ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[BSW ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[BSE ][QIN.numberOfBCnodes] = (real)-1.f;
-               Q.q27[BNW ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_000][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = (real)1.f;
+               Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = (real)-1.f;
+               Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = (real)-1.f;
                QIN.numberOfBCnodes++;
             }
        }
@@ -912,7 +912,7 @@ void findKforQOutflow(Parameter* para)
     VF_LOG_CRITICAL("findKforQOutflow() is deprecated! - see comment above for more information");
 
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
    //int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
    //int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
    int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -976,36 +976,36 @@ void findKforQOutflow(Parameter* para)
 //                   unsigned int sizeQW, real* vxW, real* vyW, real* vzW, real*deltaVW, real* QQW, QforBoundaryConditions &QWin)
 //{
 //   QforBoundaryConditions QN;
-//   QN.q27[E   ] = &QQN[E   *sizeQN];
-//   QN.q27[W   ] = &QQN[W   *sizeQN];
-//   QN.q27[N   ] = &QQN[N   *sizeQN];
-//   QN.q27[S   ] = &QQN[S   *sizeQN];
-//   QN.q27[T   ] = &QQN[T   *sizeQN];
-//   QN.q27[B   ] = &QQN[B   *sizeQN];
-//   QN.q27[NE  ] = &QQN[NE  *sizeQN];
-//   QN.q27[SW  ] = &QQN[SW  *sizeQN];
-//   QN.q27[SE  ] = &QQN[SE  *sizeQN];
-//   QN.q27[NW  ] = &QQN[NW  *sizeQN];
-//   QN.q27[TE  ] = &QQN[TE  *sizeQN];
-//   QN.q27[BW  ] = &QQN[BW  *sizeQN];
-//   QN.q27[BE  ] = &QQN[BE  *sizeQN];
-//   QN.q27[TW  ] = &QQN[TW  *sizeQN];
-//   QN.q27[TN  ] = &QQN[TN  *sizeQN];
-//   QN.q27[BS  ] = &QQN[BS  *sizeQN];
-//   QN.q27[BN  ] = &QQN[BN  *sizeQN];
-//   QN.q27[TS  ] = &QQN[TS  *sizeQN];
-//   QN.q27[REST] = &QQN[REST*sizeQN];
-//   QN.q27[TNE ] = &QQN[TNE *sizeQN];
-//   QN.q27[TSW ] = &QQN[TSW *sizeQN];
-//   QN.q27[TSE ] = &QQN[TSE *sizeQN];
-//   QN.q27[TNW ] = &QQN[TNW *sizeQN];
-//   QN.q27[BNE ] = &QQN[BNE *sizeQN];
-//   QN.q27[BSW ] = &QQN[BSW *sizeQN];
-//   QN.q27[BSE ] = &QQN[BSE *sizeQN];
-//   QN.q27[BNW ] = &QQN[BNW *sizeQN];
+//   QN.q27[DIR_P00   ] = &QQN[DIR_P00   *sizeQN];
+//   QN.q27[DIR_M00   ] = &QQN[DIR_M00   *sizeQN];
+//   QN.q27[DIR_0P0   ] = &QQN[DIR_0P0   *sizeQN];
+//   QN.q27[DIR_0M0   ] = &QQN[DIR_0M0   *sizeQN];
+//   QN.q27[DIR_00P   ] = &QQN[DIR_00P   *sizeQN];
+//   QN.q27[DIR_00M   ] = &QQN[DIR_00M   *sizeQN];
+//   QN.q27[DIR_PP0  ] = &QQN[DIR_PP0  *sizeQN];
+//   QN.q27[DIR_MM0  ] = &QQN[DIR_MM0  *sizeQN];
+//   QN.q27[DIR_PM0  ] = &QQN[DIR_PM0  *sizeQN];
+//   QN.q27[DIR_MP0  ] = &QQN[DIR_MP0  *sizeQN];
+//   QN.q27[DIR_P0P  ] = &QQN[DIR_P0P  *sizeQN];
+//   QN.q27[DIR_M0M  ] = &QQN[DIR_M0M  *sizeQN];
+//   QN.q27[DIR_P0M  ] = &QQN[DIR_P0M  *sizeQN];
+//   QN.q27[DIR_M0P  ] = &QQN[DIR_M0P  *sizeQN];
+//   QN.q27[DIR_0PP  ] = &QQN[DIR_0PP  *sizeQN];
+//   QN.q27[DIR_0MM  ] = &QQN[DIR_0MM  *sizeQN];
+//   QN.q27[DIR_0PM  ] = &QQN[DIR_0PM  *sizeQN];
+//   QN.q27[DIR_0MP  ] = &QQN[DIR_0MP  *sizeQN];
+//   QN.q27[DIR_000] = &QQN[DIR_000*sizeQN];
+//   QN.q27[DIR_PPP ] = &QQN[DIR_PPP *sizeQN];
+//   QN.q27[DIR_MMP ] = &QQN[DIR_MMP *sizeQN];
+//   QN.q27[DIR_PMP ] = &QQN[DIR_PMP *sizeQN];
+//   QN.q27[DIR_MPP ] = &QQN[DIR_MPP *sizeQN];
+//   QN.q27[DIR_PPM ] = &QQN[DIR_PPM *sizeQN];
+//   QN.q27[DIR_MMM ] = &QQN[DIR_MMM *sizeQN];
+//   QN.q27[DIR_PMM ] = &QQN[DIR_PMM *sizeQN];
+//   QN.q27[DIR_MPM ] = &QQN[DIR_MPM *sizeQN];
 //
 //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+//   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 //   int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 //   int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 //   int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -1018,7 +1018,7 @@ void findKforQOutflow(Parameter* para)
 //                      QforBoundaryConditions &QN, QforBoundaryConditions &QS, QforBoundaryConditions &QE, QforBoundaryConditions &QW)
 //{
 //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//   //////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+//   //////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 //   int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 //   int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 //   int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -1069,7 +1069,7 @@ void findQPressX0(Parameter* para, int lev)
     VF_LOG_CRITICAL("findKforQPressX0() is deprecated! - see comment above for more information");
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	//////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+	//////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 	//int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 	//int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 	//int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -1096,33 +1096,33 @@ void findQPressX0(Parameter* para, int lev)
 	QIN.numberOfBCnodes = 0;
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	QforBoundaryConditions Q;
-	Q.q27[E   ] = &QQ[E   *sizeQ];
-	Q.q27[W   ] = &QQ[W   *sizeQ];
-	Q.q27[N   ] = &QQ[N   *sizeQ];
-	Q.q27[S   ] = &QQ[S   *sizeQ];
-	Q.q27[T   ] = &QQ[T   *sizeQ];
-	Q.q27[B   ] = &QQ[B   *sizeQ];
-	Q.q27[NE  ] = &QQ[NE  *sizeQ];
-	Q.q27[SW  ] = &QQ[SW  *sizeQ];
-	Q.q27[SE  ] = &QQ[SE  *sizeQ];
-	Q.q27[NW  ] = &QQ[NW  *sizeQ];
-	Q.q27[TE  ] = &QQ[TE  *sizeQ];
-	Q.q27[BW  ] = &QQ[BW  *sizeQ];
-	Q.q27[BE  ] = &QQ[BE  *sizeQ];
-	Q.q27[TW  ] = &QQ[TW  *sizeQ];
-	Q.q27[TN  ] = &QQ[TN  *sizeQ];
-	Q.q27[BS  ] = &QQ[BS  *sizeQ];
-	Q.q27[BN  ] = &QQ[BN  *sizeQ];
-	Q.q27[TS  ] = &QQ[TS  *sizeQ];
-	Q.q27[REST] = &QQ[REST*sizeQ];
-	Q.q27[TNE ] = &QQ[TNE *sizeQ];
-	Q.q27[TSW ] = &QQ[TSW *sizeQ];
-	Q.q27[TSE ] = &QQ[TSE *sizeQ];
-	Q.q27[TNW ] = &QQ[TNW *sizeQ];
-	Q.q27[BNE ] = &QQ[BNE *sizeQ];
-	Q.q27[BSW ] = &QQ[BSW *sizeQ];
-	Q.q27[BSE ] = &QQ[BSE *sizeQ];
-	Q.q27[BNW ] = &QQ[BNW *sizeQ];
+	Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+	Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+	Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+	Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+	Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+	Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+	Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+	Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+	Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+	Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+	Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+	Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+	Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+	Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+	Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+	Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+	Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+	Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+	Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+	Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+	Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+	Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+	Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+	Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+	Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+	Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+	Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
 
 
 	//unsigned int li = ((nnx+STARTOFFX-2)-(STARTOFFX+1)-1);
@@ -1145,33 +1145,33 @@ void findQPressX0(Parameter* para, int lev)
 				//vz[QIN.numberOfBCnodes]             =  (real)(16.f*(u0*2.f)*(i-(STARTOFFX+1)-0.5f)*(li-1.5f-(i-(STARTOFFX+1)))*(j-(STARTOFFY+1)-0.5f)*(lj-1.5f-(j-(STARTOFFY+1))))/(li*lj*li*lj);
 				//vz[QIN.numberOfBCnodes]             = (real)(16.f*(u0*2.f)*i*j*(nx-i)*(ny-j))/(nx*nx*ny*ny);
 				deltaVz[QIN.numberOfBCnodes]        = (real)0.f;
-				Q.q27[E   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[W   ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[N   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[S   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[T   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[B   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[NE  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[SW  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[SE  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[NW  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TE  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BW  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BE  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TW  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TN  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BS  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BN  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TS  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[REST][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TNE ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TSW ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TSE ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TNW ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BNE ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BSW ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BSE ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BNW ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_000][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = (real)1.f;
 				QIN.numberOfBCnodes++;
 			}
 		}
@@ -1187,7 +1187,7 @@ void findKforQPressX0(Parameter* para, int lev)
     VF_LOG_CRITICAL("findKforQPressX0() is deprecated! - see comment above for more information");
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	//////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+	//////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 	//int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 	//int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 	int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -1245,7 +1245,7 @@ void findQPressX1(Parameter* para, int lev)
     VF_LOG_CRITICAL("findQPressX1() is deprecated! - see comment above for more information");
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	//////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+	//////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 	//int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 	//int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 	//int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
@@ -1272,33 +1272,33 @@ void findQPressX1(Parameter* para, int lev)
 	QIN.numberOfBCnodes = 0;
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	QforBoundaryConditions Q;
-	Q.q27[E   ] = &QQ[E   *sizeQ];
-	Q.q27[W   ] = &QQ[W   *sizeQ];
-	Q.q27[N   ] = &QQ[N   *sizeQ];
-	Q.q27[S   ] = &QQ[S   *sizeQ];
-	Q.q27[T   ] = &QQ[T   *sizeQ];
-	Q.q27[B   ] = &QQ[B   *sizeQ];
-	Q.q27[NE  ] = &QQ[NE  *sizeQ];
-	Q.q27[SW  ] = &QQ[SW  *sizeQ];
-	Q.q27[SE  ] = &QQ[SE  *sizeQ];
-	Q.q27[NW  ] = &QQ[NW  *sizeQ];
-	Q.q27[TE  ] = &QQ[TE  *sizeQ];
-	Q.q27[BW  ] = &QQ[BW  *sizeQ];
-	Q.q27[BE  ] = &QQ[BE  *sizeQ];
-	Q.q27[TW  ] = &QQ[TW  *sizeQ];
-	Q.q27[TN  ] = &QQ[TN  *sizeQ];
-	Q.q27[BS  ] = &QQ[BS  *sizeQ];
-	Q.q27[BN  ] = &QQ[BN  *sizeQ];
-	Q.q27[TS  ] = &QQ[TS  *sizeQ];
-	Q.q27[REST] = &QQ[REST*sizeQ];
-	Q.q27[TNE ] = &QQ[TNE *sizeQ];
-	Q.q27[TSW ] = &QQ[TSW *sizeQ];
-	Q.q27[TSE ] = &QQ[TSE *sizeQ];
-	Q.q27[TNW ] = &QQ[TNW *sizeQ];
-	Q.q27[BNE ] = &QQ[BNE *sizeQ];
-	Q.q27[BSW ] = &QQ[BSW *sizeQ];
-	Q.q27[BSE ] = &QQ[BSE *sizeQ];
-	Q.q27[BNW ] = &QQ[BNW *sizeQ];
+	Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+	Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+	Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+	Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+	Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+	Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+	Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+	Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+	Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+	Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+	Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+	Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+	Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+	Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+	Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+	Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+	Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+	Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+	Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+	Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+	Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+	Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+	Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+	Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+	Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+	Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+	Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
 
 
 	//unsigned int li = ((nnx+STARTOFFX-2)-(STARTOFFX+1)-1);
@@ -1321,33 +1321,33 @@ void findQPressX1(Parameter* para, int lev)
 				//vz[QIN.numberOfBCnodes]             =  (real)(16.f*(u0*2.f)*(i-(STARTOFFX+1)-0.5f)*(li-1.5f-(i-(STARTOFFX+1)))*(j-(STARTOFFY+1)-0.5f)*(lj-1.5f-(j-(STARTOFFY+1))))/(li*lj*li*lj);
 				//vz[QIN.numberOfBCnodes]             = (real)(16.f*(u0*2.f)*i*j*(nx-i)*(ny-j))/(nx*nx*ny*ny);
 				deltaVz[QIN.numberOfBCnodes]        = (real)0.f;
-				Q.q27[E   ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[W   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[N   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[S   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[T   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[B   ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[NE  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[SW  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[SE  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[NW  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TE  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BW  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BE  ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TW  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TN  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BS  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BN  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TS  ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[REST][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TNE ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TSW ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[TSE ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[TNW ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BNE ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BSW ][QIN.numberOfBCnodes] = (real)-1.f;
-				Q.q27[BSE ][QIN.numberOfBCnodes] = (real)1.f;
-				Q.q27[BNW ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_P00   ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_M00   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0P0   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0M0   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_00P   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_00M   ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PP0  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MM0  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PM0  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MP0  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_P0P  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_M0M  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_P0M  ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_M0P  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0PP  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0MM  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0PM  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_0MP  ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_000][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PPP ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MMP ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PMP ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MPP ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PPM ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MMM ][QIN.numberOfBCnodes] = (real)-1.f;
+				Q.q27[DIR_PMM ][QIN.numberOfBCnodes] = (real)1.f;
+				Q.q27[DIR_MPM ][QIN.numberOfBCnodes] = (real)-1.f;
 				QIN.numberOfBCnodes++;
 			}
 		}
@@ -1363,7 +1363,7 @@ void findKforQPressX1(Parameter* para, int lev)
     VF_LOG_CRITICAL("findKforQPressX1() is deprecated! - see comment above for more information");
 
 	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	//////////////  E   W   N   S   T   B  NE  SW  SE  NW  TE  BW  BE  TW  TN  BS  BN  TS ZERO TNE BNE TSE BSE TNW BNW TSW BSW  ////////////////////////
+	//////////////  DIR_P00   DIR_M00   DIR_0P0   DIR_0M0   DIR_00P   DIR_00M  DIR_PP0  DIR_MM0  DIR_PM0  DIR_MP0  DIR_P0P  DIR_M0M  DIR_P0M  DIR_M0P  DIR_0PP  DIR_0MM  DIR_0PM  DIR_0MP ZERO DIR_PPP DIR_PPM DIR_PMP DIR_PMM DIR_MPP DIR_MPM DIR_MMP DIR_MMM  ////////////////////////
 	//int   ex[27]={  1, -1,  0,  0,  0,  0,  1, -1,  1, -1,  1, -1,  1, -1,  0,  0,  0,  0,   0,  1,  1,  1,  1, -1, -1, -1, -1};
 	//int   ey[27]={  0,  0,  1, -1,  0,  0,  1, -1, -1,  1,  0,  0,  0,  0,  1, -1,  1, -1,   0,  1,  1, -1, -1,  1,  1, -1, -1};
 	int   ez[27]={  0,  0,  0,  0,  1, -1,  0,  0,  0,  0,  1, -1, -1,  1,  1, -1, -1,  1,   0,  1, -1,  1, -1,  1, -1,  1, -1};
diff --git a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
index 25575bb38b5d57c40cc4c9da973b12fa4a30bd2e..551205bd54d9685e9aa6ab8be47ac9e274546f40 100644
--- a/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
+++ b/src/gpu/VirtualFluids_GPU/FindQ/FindQ.h
@@ -5,42 +5,42 @@
 #include "lbm/constants/D3Q27.h"
 #include "Parameter/Parameter.h"
 
-extern "C" void findQ(Parameter* para, int lev);
+void findQ(Parameter* para, int lev);
 
-extern "C" void findKforQ(Parameter* para, int lev);
+void findKforQ(Parameter* para, int lev);
 
-extern "C" void findQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk, unsigned int sizeQ, real* QQ, QforBoundaryConditions &QIN);
+void findQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk, unsigned int sizeQ, real* QQ, QforBoundaryConditions &QIN);
 
-extern "C" void findKforQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QIN);
+void findKforQ_MG(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QIN);
 
-extern "C" void findQInflow(Parameter* para);
+void findQInflow(Parameter* para);
 
-extern "C" void findKforQInflow(Parameter* para);
+void findKforQInflow(Parameter* para);
 
-extern "C" void findQPressInflow(Parameter* para);
+void findQPressInflow(Parameter* para);
 
-extern "C" void findKforQPressInflow(Parameter* para);
+void findKforQPressInflow(Parameter* para);
 
-extern "C" void findQOutflow(Parameter* para);
+void findQOutflow(Parameter* para);
 
-extern "C" void findKforQOutflow(Parameter* para);
+void findKforQOutflow(Parameter* para);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
-//extern "C" void findQSchlaff( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk,
+//void findQSchlaff( int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, unsigned int* kk,
 //                              unsigned int sizeQN, real* vxN, real* vyN, real* vzN, real*deltaVN, real* QQN, QforBoundaryConditions &QNin,
 //                              unsigned int sizeQS, real* vxS, real* vyS, real* vzS, real*deltaVS, real* QQS, QforBoundaryConditions &QSin,
 //                              unsigned int sizeQE, real* vxE, real* vyE, real* vzE, real*deltaVE, real* QQE, QforBoundaryConditions &QEin,
 //                              unsigned int sizeQW, real* vxW, real* vyW, real* vzW, real*deltaVW, real* QQW, QforBoundaryConditions &QWin);
 //
-//extern "C" void findKforQSchlaff(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QN, QforBoundaryConditions &QS, QforBoundaryConditions &QE, QforBoundaryConditions &QW);
+//void findKforQSchlaff(int nx, int ny, unsigned int nnx, unsigned int nny, unsigned int nnz, int* geo_mat, QforBoundaryConditions &QN, QforBoundaryConditions &QS, QforBoundaryConditions &QE, QforBoundaryConditions &QW);
 
 
-extern "C" void findKforQPressX1(Parameter* para, int lev);
+void findKforQPressX1(Parameter* para, int lev);
 
-extern "C" void findQPressX1(Parameter* para, int lev);
+void findQPressX1(Parameter* para, int lev);
 
-extern "C" void findKforQPressX0(Parameter* para, int lev);
+void findKforQPressX0(Parameter* para, int lev);
 
-extern "C" void findQPressX0(Parameter* para, int lev);
+void findQPressX0(Parameter* para, int lev);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
index 9dfa96e10c750b3494bc0733421cd91b9219dfb5..8f54358e04063c9063c873caf02a86e76bb7f936 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusion27chim.cu
@@ -66,7 +66,7 @@ inline __device__ void backwardChimera(real &mfa, real &mfb, real &mfc, real vv,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
+__global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -109,125 +109,125 @@ extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device
 		Distributions27 dist;
 		if (isEvenTimestep)
 		{
-			dist.f[E   ] = &distributions[E   *size_Mat];
-			dist.f[W   ] = &distributions[W   *size_Mat];
-			dist.f[N   ] = &distributions[N   *size_Mat];
-			dist.f[S   ] = &distributions[S   *size_Mat];
-			dist.f[T   ] = &distributions[T   *size_Mat];
-			dist.f[B   ] = &distributions[B   *size_Mat];
-			dist.f[NE  ] = &distributions[NE  *size_Mat];
-			dist.f[SW  ] = &distributions[SW  *size_Mat];
-			dist.f[SE  ] = &distributions[SE  *size_Mat];
-			dist.f[NW  ] = &distributions[NW  *size_Mat];
-			dist.f[TE  ] = &distributions[TE  *size_Mat];
-			dist.f[BW  ] = &distributions[BW  *size_Mat];
-			dist.f[BE  ] = &distributions[BE  *size_Mat];
-			dist.f[TW  ] = &distributions[TW  *size_Mat];
-			dist.f[TN  ] = &distributions[TN  *size_Mat];
-			dist.f[BS  ] = &distributions[BS  *size_Mat];
-			dist.f[BN  ] = &distributions[BN  *size_Mat];
-			dist.f[TS  ] = &distributions[TS  *size_Mat];
-			dist.f[REST] = &distributions[REST*size_Mat];
-			dist.f[TNE ] = &distributions[TNE *size_Mat];
-			dist.f[TSW ] = &distributions[TSW *size_Mat];
-			dist.f[TSE ] = &distributions[TSE *size_Mat];
-			dist.f[TNW ] = &distributions[TNW *size_Mat];
-			dist.f[BNE ] = &distributions[BNE *size_Mat];
-			dist.f[BSW ] = &distributions[BSW *size_Mat];
-			dist.f[BSE ] = &distributions[BSE *size_Mat];
-			dist.f[BNW ] = &distributions[BNW *size_Mat];
+			dist.f[DIR_P00   ] = &distributions[DIR_P00   *size_Mat];
+			dist.f[DIR_M00   ] = &distributions[DIR_M00   *size_Mat];
+			dist.f[DIR_0P0   ] = &distributions[DIR_0P0   *size_Mat];
+			dist.f[DIR_0M0   ] = &distributions[DIR_0M0   *size_Mat];
+			dist.f[DIR_00P   ] = &distributions[DIR_00P   *size_Mat];
+			dist.f[DIR_00M   ] = &distributions[DIR_00M   *size_Mat];
+			dist.f[DIR_PP0  ] = &distributions[DIR_PP0  *size_Mat];
+			dist.f[DIR_MM0  ] = &distributions[DIR_MM0  *size_Mat];
+			dist.f[DIR_PM0  ] = &distributions[DIR_PM0  *size_Mat];
+			dist.f[DIR_MP0  ] = &distributions[DIR_MP0  *size_Mat];
+			dist.f[DIR_P0P  ] = &distributions[DIR_P0P  *size_Mat];
+			dist.f[DIR_M0M  ] = &distributions[DIR_M0M  *size_Mat];
+			dist.f[DIR_P0M  ] = &distributions[DIR_P0M  *size_Mat];
+			dist.f[DIR_M0P  ] = &distributions[DIR_M0P  *size_Mat];
+			dist.f[DIR_0PP  ] = &distributions[DIR_0PP  *size_Mat];
+			dist.f[DIR_0MM  ] = &distributions[DIR_0MM  *size_Mat];
+			dist.f[DIR_0PM  ] = &distributions[DIR_0PM  *size_Mat];
+			dist.f[DIR_0MP  ] = &distributions[DIR_0MP  *size_Mat];
+			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
+			dist.f[DIR_PPP ] = &distributions[DIR_PPP *size_Mat];
+			dist.f[DIR_MMP ] = &distributions[DIR_MMP *size_Mat];
+			dist.f[DIR_PMP ] = &distributions[DIR_PMP *size_Mat];
+			dist.f[DIR_MPP ] = &distributions[DIR_MPP *size_Mat];
+			dist.f[DIR_PPM ] = &distributions[DIR_PPM *size_Mat];
+			dist.f[DIR_MMM ] = &distributions[DIR_MMM *size_Mat];
+			dist.f[DIR_PMM ] = &distributions[DIR_PMM *size_Mat];
+			dist.f[DIR_MPM ] = &distributions[DIR_MPM *size_Mat];
 		}
 		else
 		{
-			dist.f[W   ] = &distributions[E   *size_Mat];
-			dist.f[E   ] = &distributions[W   *size_Mat];
-			dist.f[S   ] = &distributions[N   *size_Mat];
-			dist.f[N   ] = &distributions[S   *size_Mat];
-			dist.f[B   ] = &distributions[T   *size_Mat];
-			dist.f[T   ] = &distributions[B   *size_Mat];
-			dist.f[SW  ] = &distributions[NE  *size_Mat];
-			dist.f[NE  ] = &distributions[SW  *size_Mat];
-			dist.f[NW  ] = &distributions[SE  *size_Mat];
-			dist.f[SE  ] = &distributions[NW  *size_Mat];
-			dist.f[BW  ] = &distributions[TE  *size_Mat];
-			dist.f[TE  ] = &distributions[BW  *size_Mat];
-			dist.f[TW  ] = &distributions[BE  *size_Mat];
-			dist.f[BE  ] = &distributions[TW  *size_Mat];
-			dist.f[BS  ] = &distributions[TN  *size_Mat];
-			dist.f[TN  ] = &distributions[BS  *size_Mat];
-			dist.f[TS  ] = &distributions[BN  *size_Mat];
-			dist.f[BN  ] = &distributions[TS  *size_Mat];
-			dist.f[REST] = &distributions[REST*size_Mat];
-			dist.f[BSW ] = &distributions[TNE *size_Mat];
-			dist.f[BNE ] = &distributions[TSW *size_Mat];
-			dist.f[BNW ] = &distributions[TSE *size_Mat];
-			dist.f[BSE ] = &distributions[TNW *size_Mat];
-			dist.f[TSW ] = &distributions[BNE *size_Mat];
-			dist.f[TNE ] = &distributions[BSW *size_Mat];
-			dist.f[TNW ] = &distributions[BSE *size_Mat];
-			dist.f[TSE ] = &distributions[BNW *size_Mat];
+			dist.f[DIR_M00   ] = &distributions[DIR_P00   *size_Mat];
+			dist.f[DIR_P00   ] = &distributions[DIR_M00   *size_Mat];
+			dist.f[DIR_0M0   ] = &distributions[DIR_0P0   *size_Mat];
+			dist.f[DIR_0P0   ] = &distributions[DIR_0M0   *size_Mat];
+			dist.f[DIR_00M   ] = &distributions[DIR_00P   *size_Mat];
+			dist.f[DIR_00P   ] = &distributions[DIR_00M   *size_Mat];
+			dist.f[DIR_MM0  ] = &distributions[DIR_PP0  *size_Mat];
+			dist.f[DIR_PP0  ] = &distributions[DIR_MM0  *size_Mat];
+			dist.f[DIR_MP0  ] = &distributions[DIR_PM0  *size_Mat];
+			dist.f[DIR_PM0  ] = &distributions[DIR_MP0  *size_Mat];
+			dist.f[DIR_M0M  ] = &distributions[DIR_P0P  *size_Mat];
+			dist.f[DIR_P0P  ] = &distributions[DIR_M0M  *size_Mat];
+			dist.f[DIR_M0P  ] = &distributions[DIR_P0M  *size_Mat];
+			dist.f[DIR_P0M  ] = &distributions[DIR_M0P  *size_Mat];
+			dist.f[DIR_0MM  ] = &distributions[DIR_0PP  *size_Mat];
+			dist.f[DIR_0PP  ] = &distributions[DIR_0MM  *size_Mat];
+			dist.f[DIR_0MP  ] = &distributions[DIR_0PM  *size_Mat];
+			dist.f[DIR_0PM  ] = &distributions[DIR_0MP  *size_Mat];
+			dist.f[DIR_000] = &distributions[DIR_000*size_Mat];
+			dist.f[DIR_MMM ] = &distributions[DIR_PPP *size_Mat];
+			dist.f[DIR_PPM ] = &distributions[DIR_MMP *size_Mat];
+			dist.f[DIR_MPM ] = &distributions[DIR_PMP *size_Mat];
+			dist.f[DIR_PMM ] = &distributions[DIR_MPP *size_Mat];
+			dist.f[DIR_MMP ] = &distributions[DIR_PPM *size_Mat];
+			dist.f[DIR_PPP ] = &distributions[DIR_MMM *size_Mat];
+			dist.f[DIR_MPP ] = &distributions[DIR_PMM *size_Mat];
+			dist.f[DIR_PMP ] = &distributions[DIR_MPM *size_Mat];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[E   ] = &distributionsAD[E   *size_Mat];
-			distAD.f[W   ] = &distributionsAD[W   *size_Mat];
-			distAD.f[N   ] = &distributionsAD[N   *size_Mat];
-			distAD.f[S   ] = &distributionsAD[S   *size_Mat];
-			distAD.f[T   ] = &distributionsAD[T   *size_Mat];
-			distAD.f[B   ] = &distributionsAD[B   *size_Mat];
-			distAD.f[NE  ] = &distributionsAD[NE  *size_Mat];
-			distAD.f[SW  ] = &distributionsAD[SW  *size_Mat];
-			distAD.f[SE  ] = &distributionsAD[SE  *size_Mat];
-			distAD.f[NW  ] = &distributionsAD[NW  *size_Mat];
-			distAD.f[TE  ] = &distributionsAD[TE  *size_Mat];
-			distAD.f[BW  ] = &distributionsAD[BW  *size_Mat];
-			distAD.f[BE  ] = &distributionsAD[BE  *size_Mat];
-			distAD.f[TW  ] = &distributionsAD[TW  *size_Mat];
-			distAD.f[TN  ] = &distributionsAD[TN  *size_Mat];
-			distAD.f[BS  ] = &distributionsAD[BS  *size_Mat];
-			distAD.f[BN  ] = &distributionsAD[BN  *size_Mat];
-			distAD.f[TS  ] = &distributionsAD[TS  *size_Mat];
-			distAD.f[REST] = &distributionsAD[REST*size_Mat];
-			distAD.f[TNE ] = &distributionsAD[TNE *size_Mat];
-			distAD.f[TSW ] = &distributionsAD[TSW *size_Mat];
-			distAD.f[TSE ] = &distributionsAD[TSE *size_Mat];
-			distAD.f[TNW ] = &distributionsAD[TNW *size_Mat];
-			distAD.f[BNE ] = &distributionsAD[BNE *size_Mat];
-			distAD.f[BSW ] = &distributionsAD[BSW *size_Mat];
-			distAD.f[BSE ] = &distributionsAD[BSE *size_Mat];
-			distAD.f[BNW ] = &distributionsAD[BNW *size_Mat];
+			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
+			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
+			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
+			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
+			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
+			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
+			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
+			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
+			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
+			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
+			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
+			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
+			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
+			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
+			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
+			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
+			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
+			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
+			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
 		}
 		else
 		{
-			distAD.f[W   ] = &distributionsAD[E   *size_Mat];
-			distAD.f[E   ] = &distributionsAD[W   *size_Mat];
-			distAD.f[S   ] = &distributionsAD[N   *size_Mat];
-			distAD.f[N   ] = &distributionsAD[S   *size_Mat];
-			distAD.f[B   ] = &distributionsAD[T   *size_Mat];
-			distAD.f[T   ] = &distributionsAD[B   *size_Mat];
-			distAD.f[SW  ] = &distributionsAD[NE  *size_Mat];
-			distAD.f[NE  ] = &distributionsAD[SW  *size_Mat];
-			distAD.f[NW  ] = &distributionsAD[SE  *size_Mat];
-			distAD.f[SE  ] = &distributionsAD[NW  *size_Mat];
-			distAD.f[BW  ] = &distributionsAD[TE  *size_Mat];
-			distAD.f[TE  ] = &distributionsAD[BW  *size_Mat];
-			distAD.f[TW  ] = &distributionsAD[BE  *size_Mat];
-			distAD.f[BE  ] = &distributionsAD[TW  *size_Mat];
-			distAD.f[BS  ] = &distributionsAD[TN  *size_Mat];
-			distAD.f[TN  ] = &distributionsAD[BS  *size_Mat];
-			distAD.f[TS  ] = &distributionsAD[BN  *size_Mat];
-			distAD.f[BN  ] = &distributionsAD[TS  *size_Mat];
-			distAD.f[REST] = &distributionsAD[REST*size_Mat];
-			distAD.f[BSW ] = &distributionsAD[TNE *size_Mat];
-			distAD.f[BNE ] = &distributionsAD[TSW *size_Mat];
-			distAD.f[BNW ] = &distributionsAD[TSE *size_Mat];
-			distAD.f[BSE ] = &distributionsAD[TNW *size_Mat];
-			distAD.f[TSW ] = &distributionsAD[BNE *size_Mat];
-			distAD.f[TNE ] = &distributionsAD[BSW *size_Mat];
-			distAD.f[TNW ] = &distributionsAD[BSE *size_Mat];
-			distAD.f[TSE ] = &distributionsAD[BNW *size_Mat];
+			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
+			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
+			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
+			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
+			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
+			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
+			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
+			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
+			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
+			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
+			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
+			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
+			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
+			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
+			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
+			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
+			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
+			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
+			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
 		}
 		////////////////////////////////////////////////////////////////////////////////
 		//! - Set neighbor indices (necessary for indirect addressing)
@@ -241,63 +241,63 @@ extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Fluid
 		//!
-		real fcbb = (dist.f[E   ])[k];
-		real fabb = (dist.f[W   ])[kw];
-		real fbcb = (dist.f[N   ])[k];
-		real fbab = (dist.f[S   ])[ks];
-		real fbbc = (dist.f[T   ])[k];
-		real fbba = (dist.f[B   ])[kb];
-		real fccb = (dist.f[NE  ])[k];
-		real faab = (dist.f[SW  ])[ksw];
-		real fcab = (dist.f[SE  ])[ks];
-		real facb = (dist.f[NW  ])[kw];
-		real fcbc = (dist.f[TE  ])[k];
-		real faba = (dist.f[BW  ])[kbw];
-		real fcba = (dist.f[BE  ])[kb];
-		real fabc = (dist.f[TW  ])[kw];
-		real fbcc = (dist.f[TN  ])[k];
-		real fbaa = (dist.f[BS  ])[kbs];
-		real fbca = (dist.f[BN  ])[kb];
-		real fbac = (dist.f[TS  ])[ks];
-		real fbbb = (dist.f[REST])[k];
-		real fccc = (dist.f[TNE ])[k];
-		real faac = (dist.f[TSW ])[ksw];
-		real fcac = (dist.f[TSE ])[ks];
-		real facc = (dist.f[TNW ])[kw];
-		real fcca = (dist.f[BNE ])[kb];
-		real faaa = (dist.f[BSW ])[kbsw];
-		real fcaa = (dist.f[BSE ])[kbs];
-		real faca = (dist.f[BNW ])[kbw];
+		real fcbb = (dist.f[DIR_P00   ])[k];
+		real fabb = (dist.f[DIR_M00   ])[kw];
+		real fbcb = (dist.f[DIR_0P0   ])[k];
+		real fbab = (dist.f[DIR_0M0   ])[ks];
+		real fbbc = (dist.f[DIR_00P   ])[k];
+		real fbba = (dist.f[DIR_00M   ])[kb];
+		real fccb = (dist.f[DIR_PP0  ])[k];
+		real faab = (dist.f[DIR_MM0  ])[ksw];
+		real fcab = (dist.f[DIR_PM0  ])[ks];
+		real facb = (dist.f[DIR_MP0  ])[kw];
+		real fcbc = (dist.f[DIR_P0P  ])[k];
+		real faba = (dist.f[DIR_M0M  ])[kbw];
+		real fcba = (dist.f[DIR_P0M  ])[kb];
+		real fabc = (dist.f[DIR_M0P  ])[kw];
+		real fbcc = (dist.f[DIR_0PP  ])[k];
+		real fbaa = (dist.f[DIR_0MM  ])[kbs];
+		real fbca = (dist.f[DIR_0PM  ])[kb];
+		real fbac = (dist.f[DIR_0MP  ])[ks];
+		real fbbb = (dist.f[DIR_000])[k];
+		real fccc = (dist.f[DIR_PPP ])[k];
+		real faac = (dist.f[DIR_MMP ])[ksw];
+		real fcac = (dist.f[DIR_PMP ])[ks];
+		real facc = (dist.f[DIR_MPP ])[kw];
+		real fcca = (dist.f[DIR_PPM ])[kb];
+		real faaa = (dist.f[DIR_MMM ])[kbsw];
+		real fcaa = (dist.f[DIR_PMM ])[kbs];
+		real faca = (dist.f[DIR_MPM ])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Set local distributions Advection Diffusion
 		//!
-		real mfcbb = (distAD.f[E   ])[k];
-		real mfabb = (distAD.f[W   ])[kw];
-		real mfbcb = (distAD.f[N   ])[k];
-		real mfbab = (distAD.f[S   ])[ks];
-		real mfbbc = (distAD.f[T   ])[k];
-		real mfbba = (distAD.f[B   ])[kb];
-		real mfccb = (distAD.f[NE  ])[k];
-		real mfaab = (distAD.f[SW  ])[ksw];
-		real mfcab = (distAD.f[SE  ])[ks];
-		real mfacb = (distAD.f[NW  ])[kw];
-		real mfcbc = (distAD.f[TE  ])[k];
-		real mfaba = (distAD.f[BW  ])[kbw];
-		real mfcba = (distAD.f[BE  ])[kb];
-		real mfabc = (distAD.f[TW  ])[kw];
-		real mfbcc = (distAD.f[TN  ])[k];
-		real mfbaa = (distAD.f[BS  ])[kbs];
-		real mfbca = (distAD.f[BN  ])[kb];
-		real mfbac = (distAD.f[TS  ])[ks];
-		real mfbbb = (distAD.f[REST])[k];
-		real mfccc = (distAD.f[TNE ])[k];
-		real mfaac = (distAD.f[TSW ])[ksw];
-		real mfcac = (distAD.f[TSE ])[ks];
-		real mfacc = (distAD.f[TNW ])[kw];
-		real mfcca = (distAD.f[BNE ])[kb];
-		real mfaaa = (distAD.f[BSW ])[kbsw];
-		real mfcaa = (distAD.f[BSE ])[kbs];
-		real mfaca = (distAD.f[BNW ])[kbw];
+		real mfcbb = (distAD.f[DIR_P00   ])[k];
+		real mfabb = (distAD.f[DIR_M00   ])[kw];
+		real mfbcb = (distAD.f[DIR_0P0   ])[k];
+		real mfbab = (distAD.f[DIR_0M0   ])[ks];
+		real mfbbc = (distAD.f[DIR_00P   ])[k];
+		real mfbba = (distAD.f[DIR_00M   ])[kb];
+		real mfccb = (distAD.f[DIR_PP0  ])[k];
+		real mfaab = (distAD.f[DIR_MM0  ])[ksw];
+		real mfcab = (distAD.f[DIR_PM0  ])[ks];
+		real mfacb = (distAD.f[DIR_MP0  ])[kw];
+		real mfcbc = (distAD.f[DIR_P0P  ])[k];
+		real mfaba = (distAD.f[DIR_M0M  ])[kbw];
+		real mfcba = (distAD.f[DIR_P0M  ])[kb];
+		real mfabc = (distAD.f[DIR_M0P  ])[kw];
+		real mfbcc = (distAD.f[DIR_0PP  ])[k];
+		real mfbaa = (distAD.f[DIR_0MM  ])[kbs];
+		real mfbca = (distAD.f[DIR_0PM  ])[kb];
+		real mfbac = (distAD.f[DIR_0MP  ])[ks];
+		real mfbbb = (distAD.f[DIR_000])[k];
+		real mfccc = (distAD.f[DIR_PPP ])[k];
+		real mfaac = (distAD.f[DIR_MMP ])[ksw];
+		real mfcac = (distAD.f[DIR_PMP ])[ks];
+		real mfacc = (distAD.f[DIR_MPP ])[kw];
+		real mfcca = (distAD.f[DIR_PPM ])[kb];
+		real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
+		real mfcaa = (distAD.f[DIR_PMM ])[kbs];
+		real mfaca = (distAD.f[DIR_MPM ])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		//! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 		//! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -503,33 +503,33 @@ extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device
 		//! stored arrays dependent on timestep is based on the esoteric twist algorithm
 		//! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
 		//!
-		(distAD.f[E   ])[k   ] = mfabb;
-		(distAD.f[W   ])[kw  ] = mfcbb;
-		(distAD.f[N   ])[k   ] = mfbab;
-		(distAD.f[S   ])[ks  ] = mfbcb;
-		(distAD.f[T   ])[k   ] = mfbba;
-		(distAD.f[B   ])[kb  ] = mfbbc;
-		(distAD.f[NE  ])[k   ] = mfaab;
-		(distAD.f[SW  ])[ksw ] = mfccb;
-		(distAD.f[SE  ])[ks  ] = mfacb;
-		(distAD.f[NW  ])[kw  ] = mfcab;
-		(distAD.f[TE  ])[k   ] = mfaba;
-		(distAD.f[BW  ])[kbw ] = mfcbc;
-		(distAD.f[BE  ])[kb  ] = mfabc;
-		(distAD.f[TW  ])[kw  ] = mfcba;
-		(distAD.f[TN  ])[k   ] = mfbaa;
-		(distAD.f[BS  ])[kbs ] = mfbcc;
-		(distAD.f[BN  ])[kb  ] = mfbac;
-		(distAD.f[TS  ])[ks  ] = mfbca;
-		(distAD.f[REST])[k   ] = mfbbb;
-		(distAD.f[TNE ])[k   ] = mfaaa;
-		(distAD.f[TSE ])[ks  ] = mfaca;
-		(distAD.f[BNE ])[kb  ] = mfaac;
-		(distAD.f[BSE ])[kbs ] = mfacc;
-		(distAD.f[TNW ])[kw  ] = mfcaa;
-		(distAD.f[TSW ])[ksw ] = mfcca;
-		(distAD.f[BNW ])[kbw ] = mfcac;
-		(distAD.f[BSW ])[kbsw] = mfccc;
+		(distAD.f[DIR_P00   ])[k   ] = mfabb;
+		(distAD.f[DIR_M00   ])[kw  ] = mfcbb;
+		(distAD.f[DIR_0P0   ])[k   ] = mfbab;
+		(distAD.f[DIR_0M0   ])[ks  ] = mfbcb;
+		(distAD.f[DIR_00P   ])[k   ] = mfbba;
+		(distAD.f[DIR_00M   ])[kb  ] = mfbbc;
+		(distAD.f[DIR_PP0  ])[k   ] = mfaab;
+		(distAD.f[DIR_MM0  ])[ksw ] = mfccb;
+		(distAD.f[DIR_PM0  ])[ks  ] = mfacb;
+		(distAD.f[DIR_MP0  ])[kw  ] = mfcab;
+		(distAD.f[DIR_P0P  ])[k   ] = mfaba;
+		(distAD.f[DIR_M0M  ])[kbw ] = mfcbc;
+		(distAD.f[DIR_P0M  ])[kb  ] = mfabc;
+		(distAD.f[DIR_M0P  ])[kw  ] = mfcba;
+		(distAD.f[DIR_0PP  ])[k   ] = mfbaa;
+		(distAD.f[DIR_0MM  ])[kbs ] = mfbcc;
+		(distAD.f[DIR_0PM  ])[kb  ] = mfbac;
+		(distAD.f[DIR_0MP  ])[ks  ] = mfbca;
+		(distAD.f[DIR_000])[k   ] = mfbbb;
+		(distAD.f[DIR_PPP ])[k   ] = mfaaa;
+		(distAD.f[DIR_PMP ])[ks  ] = mfaca;
+		(distAD.f[DIR_PPM ])[kb  ] = mfaac;
+		(distAD.f[DIR_PMM ])[kbs ] = mfacc;
+		(distAD.f[DIR_MPP ])[kw  ] = mfcaa;
+		(distAD.f[DIR_MMP ])[ksw ] = mfcca;
+		(distAD.f[DIR_MPM ])[kbw ] = mfcac;
+		(distAD.f[DIR_MMM ])[kbsw] = mfccc;
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
index cc2b3c7c9311def9f3d94f264e68e9ff159d513f..ecf98a7494a0a5e1c81c1040917e941f066605e6 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/AdvectionDiffusionBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress7(  real* DD, 
+__global__ void QADPress7(  real* DD, 
                                        real* DD7, 
                                        real* temp,
                                        real* velo,
@@ -26,63 +26,63 @@ extern "C" __global__ void QADPress7(  real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions7 D7;
@@ -128,32 +128,32 @@ extern "C" __global__ void QADPress7(  real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      //q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      //q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      //q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      //q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      //q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      //q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      //q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      //q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      //q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      //q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      //q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      //q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      //q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      //q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      //q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      //q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      //q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      //q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      //q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      //q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      //q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      //q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      //q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      //q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      //q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -188,32 +188,32 @@ extern "C" __global__ void QADPress7(  real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       //real vx1_Inflow   = zero;
@@ -226,7 +226,7 @@ extern "C" __global__ void QADPress7(  real* DD,
 
       //drho   =    f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
       //            f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-      //            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+      //            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       //real vx1 =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //               ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -240,7 +240,7 @@ extern "C" __global__ void QADPress7(  real* DD,
       //               (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
       //               (f_T - f_B); 
 
-      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[REST])[kzero]);
+      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[DIR_000])[kzero]);
       real rho    =  rho0 + c1o1;
       real OORho  =  c1o1/rho;
       real vx1    =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
@@ -314,14 +314,14 @@ extern "C" __global__ void QADPress7(  real* DD,
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=0.1f;
+      //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //E
-      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //W
-      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //N
-      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //S
-      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //T
-      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //B
+      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //DIR_P00
+      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //DIR_M00
+      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //DIR_0P0
+      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //DIR_0M0
+      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //DIR_00P
+      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //DIR_00M
 
       //////////////////////////////////////////////////////////////////////////
       //mit Q's
@@ -449,7 +449,7 @@ extern "C" __global__ void QADPress7(  real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPress27( real* DD, 
+__global__ void QADPress27( real* DD, 
                                        real* DD27, 
                                        real* temp,
                                        real* velo,
@@ -467,125 +467,125 @@ extern "C" __global__ void QADPress27( real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -606,32 +606,32 @@ extern "C" __global__ void QADPress27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -663,33 +663,33 @@ extern "C" __global__ void QADPress27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       //drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -715,33 +715,33 @@ extern "C" __global__ void QADPress27( real* DD,
       vx2            =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3            =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[E   ])[ke   ];
-      real f27_E    = (D27.f[W   ])[kw   ];
-      real f27_S    = (D27.f[N   ])[kn   ];
-      real f27_N    = (D27.f[S   ])[ks   ];
-      real f27_B    = (D27.f[T   ])[kt   ];
-      real f27_T    = (D27.f[B   ])[kb   ];
-      real f27_SW   = (D27.f[NE  ])[kne  ];
-      real f27_NE   = (D27.f[SW  ])[ksw  ];
-      real f27_NW   = (D27.f[SE  ])[kse  ];
-      real f27_SE   = (D27.f[NW  ])[knw  ];
-      real f27_BW   = (D27.f[TE  ])[kte  ];
-      real f27_TE   = (D27.f[BW  ])[kbw  ];
-      real f27_TW   = (D27.f[BE  ])[kbe  ];
-      real f27_BE   = (D27.f[TW  ])[ktw  ];
-      real f27_BS   = (D27.f[TN  ])[ktn  ];
-      real f27_TN   = (D27.f[BS  ])[kbs  ];
-      real f27_TS   = (D27.f[BN  ])[kbn  ];
-      real f27_BN   = (D27.f[TS  ])[kts  ];
-      real f27_ZERO = (D27.f[REST])[kzero];
-      real f27_BSW  = (D27.f[TNE ])[ktne ];
-      real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      real f27_BNW  = (D27.f[TSE ])[ktse ];
-      real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      real f27_TSW  = (D27.f[BNE ])[kbne ];
-      real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      real f27_TNW  = (D27.f[BSE ])[kbse ];
-      real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_ZERO = (D27.f[DIR_000])[kzero];
+      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -849,94 +849,94 @@ extern "C" __global__ void QADPress27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -980,7 +980,7 @@ extern "C" __global__ void QADPress27( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressNEQNeighbor27(
+__global__ void QADPressNEQNeighbor27(
 													real* DD,
 													real* DD27,
 													int* k_Q,
@@ -996,125 +996,125 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[E] = &DD[E   *size_Mat];
-		D.f[W] = &DD[W   *size_Mat];
-		D.f[N] = &DD[N   *size_Mat];
-		D.f[S] = &DD[S   *size_Mat];
-		D.f[T] = &DD[T   *size_Mat];
-		D.f[B] = &DD[B   *size_Mat];
-		D.f[NE] = &DD[NE  *size_Mat];
-		D.f[SW] = &DD[SW  *size_Mat];
-		D.f[SE] = &DD[SE  *size_Mat];
-		D.f[NW] = &DD[NW  *size_Mat];
-		D.f[TE] = &DD[TE  *size_Mat];
-		D.f[BW] = &DD[BW  *size_Mat];
-		D.f[BE] = &DD[BE  *size_Mat];
-		D.f[TW] = &DD[TW  *size_Mat];
-		D.f[TN] = &DD[TN  *size_Mat];
-		D.f[BS] = &DD[BS  *size_Mat];
-		D.f[BN] = &DD[BN  *size_Mat];
-		D.f[TS] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[TNE *size_Mat];
-		D.f[TSW] = &DD[TSW *size_Mat];
-		D.f[TSE] = &DD[TSE *size_Mat];
-		D.f[TNW] = &DD[TNW *size_Mat];
-		D.f[BNE] = &DD[BNE *size_Mat];
-		D.f[BSW] = &DD[BSW *size_Mat];
-		D.f[BSE] = &DD[BSE *size_Mat];
-		D.f[BNW] = &DD[BNW *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
 	}
 	else
 	{
-		D.f[W] = &DD[E   *size_Mat];
-		D.f[E] = &DD[W   *size_Mat];
-		D.f[S] = &DD[N   *size_Mat];
-		D.f[N] = &DD[S   *size_Mat];
-		D.f[B] = &DD[T   *size_Mat];
-		D.f[T] = &DD[B   *size_Mat];
-		D.f[SW] = &DD[NE  *size_Mat];
-		D.f[NE] = &DD[SW  *size_Mat];
-		D.f[NW] = &DD[SE  *size_Mat];
-		D.f[SE] = &DD[NW  *size_Mat];
-		D.f[BW] = &DD[TE  *size_Mat];
-		D.f[TE] = &DD[BW  *size_Mat];
-		D.f[TW] = &DD[BE  *size_Mat];
-		D.f[BE] = &DD[TW  *size_Mat];
-		D.f[BS] = &DD[TN  *size_Mat];
-		D.f[TN] = &DD[BS  *size_Mat];
-		D.f[TS] = &DD[BN  *size_Mat];
-		D.f[BN] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[BSW *size_Mat];
-		D.f[TSW] = &DD[BNE *size_Mat];
-		D.f[TSE] = &DD[BNW *size_Mat];
-		D.f[TNW] = &DD[BSE *size_Mat];
-		D.f[BNE] = &DD[TSW *size_Mat];
-		D.f[BSW] = &DD[TNE *size_Mat];
-		D.f[BSE] = &DD[TNW *size_Mat];
-		D.f[BNW] = &DD[TSE *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
 	}
 
 	Distributions27 D27;
 	if (isEvenTimestep == true)
 	{
-		D27.f[E] = &DD27[E   *size_Mat];
-		D27.f[W] = &DD27[W   *size_Mat];
-		D27.f[N] = &DD27[N   *size_Mat];
-		D27.f[S] = &DD27[S   *size_Mat];
-		D27.f[T] = &DD27[T   *size_Mat];
-		D27.f[B] = &DD27[B   *size_Mat];
-		D27.f[NE] = &DD27[NE  *size_Mat];
-		D27.f[SW] = &DD27[SW  *size_Mat];
-		D27.f[SE] = &DD27[SE  *size_Mat];
-		D27.f[NW] = &DD27[NW  *size_Mat];
-		D27.f[TE] = &DD27[TE  *size_Mat];
-		D27.f[BW] = &DD27[BW  *size_Mat];
-		D27.f[BE] = &DD27[BE  *size_Mat];
-		D27.f[TW] = &DD27[TW  *size_Mat];
-		D27.f[TN] = &DD27[TN  *size_Mat];
-		D27.f[BS] = &DD27[BS  *size_Mat];
-		D27.f[BN] = &DD27[BN  *size_Mat];
-		D27.f[TS] = &DD27[TS  *size_Mat];
-		D27.f[REST] = &DD27[REST*size_Mat];
-		D27.f[TNE] = &DD27[TNE *size_Mat];
-		D27.f[TSW] = &DD27[TSW *size_Mat];
-		D27.f[TSE] = &DD27[TSE *size_Mat];
-		D27.f[TNW] = &DD27[TNW *size_Mat];
-		D27.f[BNE] = &DD27[BNE *size_Mat];
-		D27.f[BSW] = &DD27[BSW *size_Mat];
-		D27.f[BSE] = &DD27[BSE *size_Mat];
-		D27.f[BNW] = &DD27[BNW *size_Mat];
+		D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
+		D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
+		D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
+		D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
+		D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
+		D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
+		D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
+		D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
+		D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
+		D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
+		D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
+		D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
+		D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
+		D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
+		D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
+		D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
+		D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
+		D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
+		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+		D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
+		D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
+		D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
+		D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
+		D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
+		D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
+		D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
+		D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
 	}
 	else
 	{
-		D27.f[W] = &DD27[E   *size_Mat];
-		D27.f[E] = &DD27[W   *size_Mat];
-		D27.f[S] = &DD27[N   *size_Mat];
-		D27.f[N] = &DD27[S   *size_Mat];
-		D27.f[B] = &DD27[T   *size_Mat];
-		D27.f[T] = &DD27[B   *size_Mat];
-		D27.f[SW] = &DD27[NE  *size_Mat];
-		D27.f[NE] = &DD27[SW  *size_Mat];
-		D27.f[NW] = &DD27[SE  *size_Mat];
-		D27.f[SE] = &DD27[NW  *size_Mat];
-		D27.f[BW] = &DD27[TE  *size_Mat];
-		D27.f[TE] = &DD27[BW  *size_Mat];
-		D27.f[TW] = &DD27[BE  *size_Mat];
-		D27.f[BE] = &DD27[TW  *size_Mat];
-		D27.f[BS] = &DD27[TN  *size_Mat];
-		D27.f[TN] = &DD27[BS  *size_Mat];
-		D27.f[TS] = &DD27[BN  *size_Mat];
-		D27.f[BN] = &DD27[TS  *size_Mat];
-		D27.f[REST] = &DD27[REST*size_Mat];
-		D27.f[TNE] = &DD27[BSW *size_Mat];
-		D27.f[TSW] = &DD27[BNE *size_Mat];
-		D27.f[TSE] = &DD27[BNW *size_Mat];
-		D27.f[TNW] = &DD27[BSE *size_Mat];
-		D27.f[BNE] = &DD27[TSW *size_Mat];
-		D27.f[BSW] = &DD27[TNE *size_Mat];
-		D27.f[BSE] = &DD27[TNW *size_Mat];
-		D27.f[BNW] = &DD27[TSE *size_Mat];
+		D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
+		D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
+		D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
+		D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
+		D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
+		D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
+		D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
+		D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
+		D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
+		D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
+		D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
+		D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
+		D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
+		D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
+		D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
+		D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
+		D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
+		D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
+		D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+		D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
+		D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
+		D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
+		D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
+		D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
+		D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
+		D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
+		D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1163,33 +1163,33 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 		unsigned int kbsw = neighborZ[ksw];
 		////////////////////////////////////////////////////////////////////////////////
 		//distributions
-		real f_W =    (D.f[E])[ke];
-		real f_E =    (D.f[W])[kw];
-		real f_S =    (D.f[N])[kn];
-		real f_N =    (D.f[S])[ks];
-		real f_B =    (D.f[T])[kt];
-		real f_T =    (D.f[B])[kb];
-		real f_SW =   (D.f[NE])[kne];
-		real f_NE =   (D.f[SW])[ksw];
-		real f_NW =   (D.f[SE])[kse];
-		real f_SE =   (D.f[NW])[knw];
-		real f_BW =   (D.f[TE])[kte];
-		real f_TE =   (D.f[BW])[kbw];
-		real f_TW =   (D.f[BE])[kbe];
-		real f_BE =   (D.f[TW])[ktw];
-		real f_BS =   (D.f[TN])[ktn];
-		real f_TN =   (D.f[BS])[kbs];
-		real f_TS =   (D.f[BN])[kbn];
-		real f_BN =   (D.f[TS])[kts];
-		real f_ZERO = (D.f[REST])[kzero];
-		real f_BSW =  (D.f[TNE])[ktne];
-		real f_BNE =  (D.f[TSW])[ktsw];
-		real f_BNW =  (D.f[TSE])[ktse];
-		real f_BSE =  (D.f[TNW])[ktnw];
-		real f_TSW =  (D.f[BNE])[kbne];
-		real f_TNE =  (D.f[BSW])[kbsw];
-		real f_TNW =  (D.f[BSE])[kbse];
-		real f_TSE =  (D.f[BNW])[kbnw];
+		real f_W =    (D.f[DIR_P00])[ke];
+		real f_E =    (D.f[DIR_M00])[kw];
+		real f_S =    (D.f[DIR_0P0])[kn];
+		real f_N =    (D.f[DIR_0M0])[ks];
+		real f_B =    (D.f[DIR_00P])[kt];
+		real f_T =    (D.f[DIR_00M])[kb];
+		real f_SW =   (D.f[DIR_PP0])[kne];
+		real f_NE =   (D.f[DIR_MM0])[ksw];
+		real f_NW =   (D.f[DIR_PM0])[kse];
+		real f_SE =   (D.f[DIR_MP0])[knw];
+		real f_BW =   (D.f[DIR_P0P])[kte];
+		real f_TE =   (D.f[DIR_M0M])[kbw];
+		real f_TW =   (D.f[DIR_P0M])[kbe];
+		real f_BE =   (D.f[DIR_M0P])[ktw];
+		real f_BS =   (D.f[DIR_0PP])[ktn];
+		real f_TN =   (D.f[DIR_0MM])[kbs];
+		real f_TS =   (D.f[DIR_0PM])[kbn];
+		real f_BN =   (D.f[DIR_0MP])[kts];
+		real f_ZERO = (D.f[DIR_000])[kzero];
+		real f_BSW =  (D.f[DIR_PPP])[ktne];
+		real f_BNE =  (D.f[DIR_MMP])[ktsw];
+		real f_BNW =  (D.f[DIR_PMP])[ktse];
+		real f_BSE =  (D.f[DIR_MPP])[ktnw];
+		real f_TSW =  (D.f[DIR_PPM])[kbne];
+		real f_TNE =  (D.f[DIR_MMM])[kbsw];
+		real f_TNW =  (D.f[DIR_PMM])[kbse];
+		real f_TSE =  (D.f[DIR_MPM])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////
 		//macroscopic values
 		real rho0 = 
@@ -1213,33 +1213,33 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 		//AD - BC Nodes
 		////////////////////////////////////////////////////////////////////////////////
 		//distributions
-		real f27_W =    (D27.f[E])[ke];
-		real f27_E =    (D27.f[W])[kw];
-		real f27_S =    (D27.f[N])[kn];
-		real f27_N =    (D27.f[S])[ks];
-		real f27_B =    (D27.f[T])[kt];
-		real f27_T =    (D27.f[B])[kb];
-		real f27_SW =   (D27.f[NE])[kne];
-		real f27_NE =   (D27.f[SW])[ksw];
-		real f27_NW =   (D27.f[SE])[kse];
-		real f27_SE =   (D27.f[NW])[knw];
-		real f27_BW =   (D27.f[TE])[kte];
-		real f27_TE =   (D27.f[BW])[kbw];
-		real f27_TW =   (D27.f[BE])[kbe];
-		real f27_BE =   (D27.f[TW])[ktw];
-		real f27_BS =   (D27.f[TN])[ktn];
-		real f27_TN =   (D27.f[BS])[kbs];
-		real f27_TS =   (D27.f[BN])[kbn];
-		real f27_BN =   (D27.f[TS])[kts];
-		real f27_ZERO = (D27.f[REST])[kzero];
-		real f27_BSW =  (D27.f[TNE])[ktne];
-		real f27_BNE =  (D27.f[TSW])[ktsw];
-		real f27_BNW =  (D27.f[TSE])[ktse];
-		real f27_BSE =  (D27.f[TNW])[ktnw];
-		real f27_TSW =  (D27.f[BNE])[kbne];
-		real f27_TNE =  (D27.f[BSW])[kbsw];
-		real f27_TNW =  (D27.f[BSE])[kbse];
-		real f27_TSE =  (D27.f[BNW])[kbnw];
+		real f27_W =    (D27.f[DIR_P00])[ke];
+		real f27_E =    (D27.f[DIR_M00])[kw];
+		real f27_S =    (D27.f[DIR_0P0])[kn];
+		real f27_N =    (D27.f[DIR_0M0])[ks];
+		real f27_B =    (D27.f[DIR_00P])[kt];
+		real f27_T =    (D27.f[DIR_00M])[kb];
+		real f27_SW =   (D27.f[DIR_PP0])[kne];
+		real f27_NE =   (D27.f[DIR_MM0])[ksw];
+		real f27_NW =   (D27.f[DIR_PM0])[kse];
+		real f27_SE =   (D27.f[DIR_MP0])[knw];
+		real f27_BW =   (D27.f[DIR_P0P])[kte];
+		real f27_TE =   (D27.f[DIR_M0M])[kbw];
+		real f27_TW =   (D27.f[DIR_P0M])[kbe];
+		real f27_BE =   (D27.f[DIR_M0P])[ktw];
+		real f27_BS =   (D27.f[DIR_0PP])[ktn];
+		real f27_TN =   (D27.f[DIR_0MM])[kbs];
+		real f27_TS =   (D27.f[DIR_0PM])[kbn];
+		real f27_BN =   (D27.f[DIR_0MP])[kts];
+		real f27_ZERO = (D27.f[DIR_000])[kzero];
+		real f27_BSW =  (D27.f[DIR_PPP])[ktne];
+		real f27_BNE =  (D27.f[DIR_MMP])[ktsw];
+		real f27_BNW =  (D27.f[DIR_PMP])[ktse];
+		real f27_BSE =  (D27.f[DIR_MPP])[ktnw];
+		real f27_TSW =  (D27.f[DIR_PPM])[kbne];
+		real f27_TNE =  (D27.f[DIR_MMM])[kbsw];
+		real f27_TNW =  (D27.f[DIR_PMM])[kbse];
+		real f27_TSE =  (D27.f[DIR_MPM])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////
 		real cusq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 		////////////////////////////////////////////////////////////////////////////////
@@ -1345,33 +1345,33 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 		unsigned int kNbsw = neighborZ[kNsw];
 		////////////////////////////////////////////////////////////////////////////////
 		//update distributions at neighbor nodes
-        (D27.f[E   ])[kNe   ] = f27_W   ;  
-        (D27.f[W   ])[kNw   ] = f27_E   ;	
-        (D27.f[N   ])[kNn   ] = f27_S   ;	
-        (D27.f[S   ])[kNs   ] = f27_N   ;	
-        (D27.f[T   ])[kNt   ] = f27_B   ;	
-        (D27.f[B   ])[kNb   ] = f27_T   ;	
-        (D27.f[NE  ])[kNne  ] = f27_SW  ;	
-        (D27.f[SW  ])[kNsw  ] = f27_NE  ;	
-        (D27.f[SE  ])[kNse  ] = f27_NW  ;	
-        (D27.f[NW  ])[kNnw  ] = f27_SE  ;	
-        (D27.f[TE  ])[kNte  ] = f27_BW  ;	
-        (D27.f[BW  ])[kNbw  ] = f27_TE  ;	
-        (D27.f[BE  ])[kNbe  ] = f27_TW  ;	
-        (D27.f[TW  ])[kNtw  ] = f27_BE  ;	
-        (D27.f[TN  ])[kNtn  ] = f27_BS  ;	
-        (D27.f[BS  ])[kNbs  ] = f27_TN  ;	
-        (D27.f[BN  ])[kNbn  ] = f27_TS  ;	
-        (D27.f[TS  ])[kNts  ] = f27_BN  ;	
-        (D27.f[REST])[kNzero] = f27_ZERO;	
-        (D27.f[TNE ])[kNtne ] = f27_BSW ;	
-        (D27.f[TSW ])[kNtsw ] = f27_BNE ;	
-        (D27.f[TSE ])[kNtse ] = f27_BNW ;	
-        (D27.f[TNW ])[kNtnw ] = f27_BSE ;	
-        (D27.f[BNE ])[kNbne ] = f27_TSW ;	
-        (D27.f[BSW ])[kNbsw ] = f27_TNE ;	
-        (D27.f[BSE ])[kNbse ] = f27_TNW ;	
-        (D27.f[BNW ])[kNbnw ] = f27_TSE ;       
+        (D27.f[DIR_P00   ])[kNe   ] = f27_W   ;  
+        (D27.f[DIR_M00   ])[kNw   ] = f27_E   ;	
+        (D27.f[DIR_0P0   ])[kNn   ] = f27_S   ;	
+        (D27.f[DIR_0M0   ])[kNs   ] = f27_N   ;	
+        (D27.f[DIR_00P   ])[kNt   ] = f27_B   ;	
+        (D27.f[DIR_00M   ])[kNb   ] = f27_T   ;	
+        (D27.f[DIR_PP0  ])[kNne  ] = f27_SW  ;	
+        (D27.f[DIR_MM0  ])[kNsw  ] = f27_NE  ;	
+        (D27.f[DIR_PM0  ])[kNse  ] = f27_NW  ;	
+        (D27.f[DIR_MP0  ])[kNnw  ] = f27_SE  ;	
+        (D27.f[DIR_P0P  ])[kNte  ] = f27_BW  ;	
+        (D27.f[DIR_M0M  ])[kNbw  ] = f27_TE  ;	
+        (D27.f[DIR_P0M  ])[kNbe  ] = f27_TW  ;	
+        (D27.f[DIR_M0P  ])[kNtw  ] = f27_BE  ;	
+        (D27.f[DIR_0PP  ])[kNtn  ] = f27_BS  ;	
+        (D27.f[DIR_0MM  ])[kNbs  ] = f27_TN  ;	
+        (D27.f[DIR_0PM  ])[kNbn  ] = f27_TS  ;	
+        (D27.f[DIR_0MP  ])[kNts  ] = f27_BN  ;	
+        (D27.f[DIR_000])[kNzero] = f27_ZERO;	
+        (D27.f[DIR_PPP ])[kNtne ] = f27_BSW ;	
+        (D27.f[DIR_MMP ])[kNtsw ] = f27_BNE ;	
+        (D27.f[DIR_PMP ])[kNtse ] = f27_BNW ;	
+        (D27.f[DIR_MPP ])[kNtnw ] = f27_BSE ;	
+        (D27.f[DIR_PPM ])[kNbne ] = f27_TSW ;	
+        (D27.f[DIR_MMM ])[kNbsw ] = f27_TNE ;	
+        (D27.f[DIR_PMM ])[kNbse ] = f27_TNW ;	
+        (D27.f[DIR_MPM ])[kNbnw ] = f27_TSE ;       
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1415,7 +1415,7 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel7( real* DD, 
+__global__ void QADVel7( real* DD, 
                                     real* DD7, 
                                     real* temp,
                                     real* velo,
@@ -1433,63 +1433,63 @@ extern "C" __global__ void QADVel7( real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions7 D7;
@@ -1531,12 +1531,12 @@ extern "C" __global__ void QADVel7( real* DD,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;//, 
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1571,32 +1571,32 @@ extern "C" __global__ void QADVel7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       /*real drho*/;
       real vx1_Inflow   = c0o1;
@@ -1609,7 +1609,7 @@ extern "C" __global__ void QADVel7( real* DD,
 
       ////drho   =    f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
       ////            f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-      ////            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+      ////            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       //real vx1 =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //               ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1623,7 +1623,7 @@ extern "C" __global__ void QADVel7( real* DD,
       //               (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
       //               (f_T - f_B); 
 
-      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[REST])[kzero]);
+      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[DIR_000])[kzero]);
       real rho    =  rho0 + c1o1;
       real OORho  =  c1o1/rho;
       real vx1    =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
@@ -1697,14 +1697,14 @@ extern "C" __global__ void QADVel7( real* DD,
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //E
-      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //W
-      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //N
-      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //S
-      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //T
-      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //B
+      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //DIR_P00
+      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //DIR_M00
+      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //DIR_0P0
+      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //DIR_0M0
+      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //DIR_00P
+      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //DIR_00M
 
       //////////////////////////////////////////////////////////////////////////
       //mit Q's
@@ -1832,14 +1832,14 @@ extern "C" __global__ void QADVel7( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVel27(real* DD, 
+__global__ void QADVel27(real* DD, 
                                     real* DD27, 
                                     real* temp,
                                     real* velo,
                                     real diffusivity,
                                     int* k_Q, 
                                     real* QQ,
-                                    int numberOfBCnodes, 
+                                    unsigned int numberOfBCnodes, 
                                     real om1, 
                                     unsigned int* neighborX,
                                     unsigned int* neighborY,
@@ -1850,125 +1850,125 @@ extern "C" __global__ void QADVel27(real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1989,32 +1989,32 @@ extern "C" __global__ void QADVel27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2046,33 +2046,33 @@ extern "C" __global__ void QADVel27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -2098,33 +2098,33 @@ extern "C" __global__ void QADVel27(real* DD,
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[E   ])[ke   ];
-      //real f27_E    = (D27.f[W   ])[kw   ];
-      //real f27_S    = (D27.f[N   ])[kn   ];
-      //real f27_N    = (D27.f[S   ])[ks   ];
-      //real f27_B    = (D27.f[T   ])[kt   ];
-      //real f27_T    = (D27.f[B   ])[kb   ];
-      //real f27_SW   = (D27.f[NE  ])[kne  ];
-      //real f27_NE   = (D27.f[SW  ])[ksw  ];
-      //real f27_NW   = (D27.f[SE  ])[kse  ];
-      //real f27_SE   = (D27.f[NW  ])[knw  ];
-      //real f27_BW   = (D27.f[TE  ])[kte  ];
-      //real f27_TE   = (D27.f[BW  ])[kbw  ];
-      //real f27_TW   = (D27.f[BE  ])[kbe  ];
-      //real f27_BE   = (D27.f[TW  ])[ktw  ];
-      //real f27_BS   = (D27.f[TN  ])[ktn  ];
-      //real f27_TN   = (D27.f[BS  ])[kbs  ];
-      //real f27_TS   = (D27.f[BN  ])[kbn  ];
-      //real f27_BN   = (D27.f[TS  ])[kts  ];
-      //real f27_ZERO = (D27.f[REST])[kzero];
-      //real f27_BSW  = (D27.f[TNE ])[ktne ];
-      //real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      //real f27_BNW  = (D27.f[TSE ])[ktse ];
-      //real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      //real f27_TSW  = (D27.f[BNE ])[kbne ];
-      //real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      //real f27_TNW  = (D27.f[BSE ])[kbse ];
-      //real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_ZERO = (D27.f[DIR_000])[kzero];
+      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -2233,150 +2233,150 @@ extern "C" __global__ void QADVel27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D27.f[W  ])[kw  ]= four;
-      //(D27.f[E  ])[ke  ]= four;
-      //(D27.f[S  ])[ks  ]= four;
-      //(D27.f[N  ])[kn  ]= four;
-      //(D27.f[B  ])[kb  ]= four;
-      //(D27.f[T  ])[kt  ]= four;
-      //(D27.f[SW ])[ksw ]= four;
-      //(D27.f[NE ])[kne ]= four;
-      //(D27.f[NW ])[knw ]= four;
-      //(D27.f[SE ])[kse ]= four;
-      //(D27.f[BW ])[kbw ]= four;
-      //(D27.f[TE ])[kte ]= four;
-      //(D27.f[TW ])[ktw ]= four;
-      //(D27.f[BE ])[kbe ]= four;
-      //(D27.f[BS ])[kbs ]= four;
-      //(D27.f[TN ])[ktn ]= four;
-      //(D27.f[TS ])[kts ]= four;
-      //(D27.f[BN ])[kbn ]= four;
-      //(D27.f[BSW])[kbsw]= four;
-      //(D27.f[TNE])[ktne]= four;
-      //(D27.f[TSW])[ktsw]= four;
-      //(D27.f[BNE])[kbne]= four;
-      //(D27.f[BNW])[kbnw]= four;
-      //(D27.f[TSE])[ktse]= four;
-      //(D27.f[TNW])[ktnw]= four;
-      //(D27.f[BSE])[kbse]= four;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[W  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[E  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[S  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[N  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[B  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[T  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[SW ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[NE ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[NW ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[SE ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[BW ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[TE ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[TW ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[BE ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[BS ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[TN ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[TS ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[BN ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
-      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[BSW])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
-      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[TNE])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
-      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[TSW])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
-      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[BNE])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
-      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[BNW])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
-      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[TSE])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
-      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[TNW])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
-      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[BSE])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
+      //(D27.f[DIR_M00  ])[kw  ]= four;
+      //(D27.f[DIR_P00  ])[ke  ]= four;
+      //(D27.f[DIR_0M0  ])[ks  ]= four;
+      //(D27.f[DIR_0P0  ])[kn  ]= four;
+      //(D27.f[DIR_00M  ])[kb  ]= four;
+      //(D27.f[DIR_00P  ])[kt  ]= four;
+      //(D27.f[DIR_MM0 ])[ksw ]= four;
+      //(D27.f[DIR_PP0 ])[kne ]= four;
+      //(D27.f[DIR_MP0 ])[knw ]= four;
+      //(D27.f[DIR_PM0 ])[kse ]= four;
+      //(D27.f[DIR_M0M ])[kbw ]= four;
+      //(D27.f[DIR_P0P ])[kte ]= four;
+      //(D27.f[DIR_M0P ])[ktw ]= four;
+      //(D27.f[DIR_P0M ])[kbe ]= four;
+      //(D27.f[DIR_0MM ])[kbs ]= four;
+      //(D27.f[DIR_0PP ])[ktn ]= four;
+      //(D27.f[DIR_0MP ])[kts ]= four;
+      //(D27.f[DIR_0PM ])[kbn ]= four;
+      //(D27.f[DIR_MMM])[kbsw]= four;
+      //(D27.f[DIR_PPP])[ktne]= four;
+      //(D27.f[DIR_MMP])[ktsw]= four;
+      //(D27.f[DIR_PPM])[kbne]= four;
+      //(D27.f[DIR_MPM])[kbnw]= four;
+      //(D27.f[DIR_PMP])[ktse]= four;
+      //(D27.f[DIR_MPP])[ktnw]= four;
+      //(D27.f[DIR_PMM])[kbse]= four;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
+      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
+      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
+      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPM])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
+      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPM])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
+      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMP])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
+      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPP])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
+      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMM])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2420,7 +2420,7 @@ extern "C" __global__ void QADVel27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QAD7( real* DD, 
+__global__ void QAD7( real* DD, 
                                  real* DD7, 
                                  real* temp,
                                  real diffusivity,
@@ -2437,63 +2437,63 @@ extern "C" __global__ void QAD7( real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions7 D7;
@@ -2539,32 +2539,32 @@ extern "C" __global__ void QAD7( real* DD,
       //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //         *q_dirBSE, *q_dirBNW;
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      //q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      //q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      //q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      //q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      //q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      //q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      //q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      //q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      //q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      //q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      //q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      //q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      //q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      //q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      //q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      //q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      //q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      //q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      //q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      //q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      //q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      //q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      //q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      //q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      //q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2599,37 +2599,37 @@ extern "C" __global__ void QAD7( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3/*, drho*/;
       //drho   =    f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
       //            f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-      //            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+      //            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       //vx1    = ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
       //         ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -2643,7 +2643,7 @@ extern "C" __global__ void QAD7( real* DD,
       //vx3    = ((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
       //         (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
       //         (f_T - f_B); 
-      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[REST])[kzero]);
+      real rho0   =  (f_TNE+f_BSW)+(f_TSW+f_BNE)+(f_TSE+f_BNW)+(f_TNW+f_BSE)+(f_NE+f_SW)+(f_NW+f_SE)+(f_TE+f_BW)+(f_BE+f_TW)+(f_TN+f_BS)+(f_BN+f_TS)+(f_E+f_W)+(f_N+f_S)+(f_T+f_B)+ ((D.f[DIR_000])[kzero]);
       real rho    =  rho0 + c1o1;
       real OORho  =  c1o1/rho;
       vx1     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
@@ -2717,14 +2717,14 @@ extern "C" __global__ void QAD7( real* DD,
 
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //E
-      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //W
-      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //N
-      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //S
-      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //T
-      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //B
+      //(D7.f[1])[ke   ] = f7_E - feq7_E + feqW7_W; //DIR_P00
+      //(D7.f[2])[kw   ] = f7_W - feq7_W + feqW7_E; //DIR_M00
+      //(D7.f[3])[kn   ] = f7_N - feq7_N + feqW7_S; //DIR_0P0
+      //(D7.f[4])[ks   ] = f7_S - feq7_S + feqW7_N; //DIR_0M0
+      //(D7.f[5])[kt   ] = f7_T - feq7_T + feqW7_B; //DIR_00P
+      //(D7.f[6])[kb   ] = f7_B - feq7_B + feqW7_T; //DIR_00M
 
       ////////////////////////////////////////////////////////////////////////////
       ////mit Q's
@@ -2852,7 +2852,7 @@ extern "C" __global__ void QAD7( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADDirichlet27(
+__global__ void QADDirichlet27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
@@ -2870,125 +2870,125 @@ extern "C" __global__ void QADDirichlet27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3009,32 +3009,32 @@ extern "C" __global__ void QADDirichlet27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3066,33 +3066,33 @@ extern "C" __global__ void QADDirichlet27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, /*drho, feq,*/ q;
       ////drho   = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -3118,33 +3118,33 @@ extern "C" __global__ void QADDirichlet27(
       vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[E   ])[ke   ];
-      real f27_E    = (D27.f[W   ])[kw   ];
-      real f27_S    = (D27.f[N   ])[kn   ];
-      real f27_N    = (D27.f[S   ])[ks   ];
-      real f27_B    = (D27.f[T   ])[kt   ];
-      real f27_T    = (D27.f[B   ])[kb   ];
-      real f27_SW   = (D27.f[NE  ])[kne  ];
-      real f27_NE   = (D27.f[SW  ])[ksw  ];
-      real f27_NW   = (D27.f[SE  ])[kse  ];
-      real f27_SE   = (D27.f[NW  ])[knw  ];
-      real f27_BW   = (D27.f[TE  ])[kte  ];
-      real f27_TE   = (D27.f[BW  ])[kbw  ];
-      real f27_TW   = (D27.f[BE  ])[kbe  ];
-      real f27_BE   = (D27.f[TW  ])[ktw  ];
-      real f27_BS   = (D27.f[TN  ])[ktn  ];
-      real f27_TN   = (D27.f[BS  ])[kbs  ];
-      real f27_TS   = (D27.f[BN  ])[kbn  ];
-      real f27_BN   = (D27.f[TS  ])[kts  ];
-      real f27_ZERO = (D27.f[REST])[kzero];
-      real f27_BSW  = (D27.f[TNE ])[ktne ];
-      real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      real f27_BNW  = (D27.f[TSE ])[ktse ];
-      real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      real f27_TSW  = (D27.f[BNE ])[kbne ];
-      real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      real f27_TNW  = (D27.f[BSE ])[kbse ];
-      real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_ZERO = (D27.f[DIR_000])[kzero];
+      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3220,120 +3220,120 @@ extern "C" __global__ void QADDirichlet27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=0.1f;
+      //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
-      q = q_dirTNE[ktne ]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
-      q = q_dirBSW[kbsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
-      q = q_dirBNE[kbne ]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
-      q = q_dirTSW[ktsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
-      q = q_dirTSE[ktse ]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
-      q = q_dirBNW[kbnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
-      q = q_dirBSE[kbse ]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
-      q = q_dirTNW[ktnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[W  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[E  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[S  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[N  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[B  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[T  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[SW ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[NE ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[NW ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[SE ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[BW ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[TE ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[TW ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[BE ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[BS ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[TN ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[TS ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[BN ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
-      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[BSW])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
-      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[TNE])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
-      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[TSW])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
-      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[BNE])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
-      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[BNW])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
-      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[TSE])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
-      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[TNW])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
-      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[BSE])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
+      q = q_dirE[  ke   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[  kw   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[  kn   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[  ks   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[  kt   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[  kb   ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[ kne  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[ ksw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[ kse  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[ knw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[ kte  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[ kbw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[ kbe  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[ ktw  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[ ktn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[ kbs  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[ kbn  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[ kts  ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirTNE[ktne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
+      q = q_dirBSW[kbsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
+      q = q_dirBNE[kbne ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
+      q = q_dirTSW[ktsw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
+      q = q_dirTSE[ktse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
+      q = q_dirBNW[kbnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
+      q = q_dirBSE[kbse ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
+      q = q_dirTNW[ktnw ]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
+      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
+      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
+      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPM])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
+      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPM])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
+      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMP])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
+      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPP])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
+      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMM])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3378,7 +3378,7 @@ extern "C" __global__ void QADDirichlet27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADBB27( real* DD, 
+__global__ void QADBB27( real* DD, 
                                    real* DD27, 
                                    real* temp,
                                    real diffusivity,
@@ -3395,125 +3395,125 @@ extern "C" __global__ void QADBB27( real* DD,
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[E   ] = &DD[E   *size_Mat];
-   //   D.f[W   ] = &DD[W   *size_Mat];
-   //   D.f[N   ] = &DD[N   *size_Mat];
-   //   D.f[S   ] = &DD[S   *size_Mat];
-   //   D.f[T   ] = &DD[T   *size_Mat];
-   //   D.f[B   ] = &DD[B   *size_Mat];
-   //   D.f[NE  ] = &DD[NE  *size_Mat];
-   //   D.f[SW  ] = &DD[SW  *size_Mat];
-   //   D.f[SE  ] = &DD[SE  *size_Mat];
-   //   D.f[NW  ] = &DD[NW  *size_Mat];
-   //   D.f[TE  ] = &DD[TE  *size_Mat];
-   //   D.f[BW  ] = &DD[BW  *size_Mat];
-   //   D.f[BE  ] = &DD[BE  *size_Mat];
-   //   D.f[TW  ] = &DD[TW  *size_Mat];
-   //   D.f[TN  ] = &DD[TN  *size_Mat];
-   //   D.f[BS  ] = &DD[BS  *size_Mat];
-   //   D.f[BN  ] = &DD[BN  *size_Mat];
-   //   D.f[TS  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[TNE *size_Mat];
-   //   D.f[TSW ] = &DD[TSW *size_Mat];
-   //   D.f[TSE ] = &DD[TSE *size_Mat];
-   //   D.f[TNW ] = &DD[TNW *size_Mat];
-   //   D.f[BNE ] = &DD[BNE *size_Mat];
-   //   D.f[BSW ] = &DD[BSW *size_Mat];
-   //   D.f[BSE ] = &DD[BSE *size_Mat];
-   //   D.f[BNW ] = &DD[BNW *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //} 
    //else
    //{
-   //   D.f[W   ] = &DD[E   *size_Mat];
-   //   D.f[E   ] = &DD[W   *size_Mat];
-   //   D.f[S   ] = &DD[N   *size_Mat];
-   //   D.f[N   ] = &DD[S   *size_Mat];
-   //   D.f[B   ] = &DD[T   *size_Mat];
-   //   D.f[T   ] = &DD[B   *size_Mat];
-   //   D.f[SW  ] = &DD[NE  *size_Mat];
-   //   D.f[NE  ] = &DD[SW  *size_Mat];
-   //   D.f[NW  ] = &DD[SE  *size_Mat];
-   //   D.f[SE  ] = &DD[NW  *size_Mat];
-   //   D.f[BW  ] = &DD[TE  *size_Mat];
-   //   D.f[TE  ] = &DD[BW  *size_Mat];
-   //   D.f[TW  ] = &DD[BE  *size_Mat];
-   //   D.f[BE  ] = &DD[TW  *size_Mat];
-   //   D.f[BS  ] = &DD[TN  *size_Mat];
-   //   D.f[TN  ] = &DD[BS  *size_Mat];
-   //   D.f[TS  ] = &DD[BN  *size_Mat];
-   //   D.f[BN  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[BSW *size_Mat];
-   //   D.f[TSW ] = &DD[BNE *size_Mat];
-   //   D.f[TSE ] = &DD[BNW *size_Mat];
-   //   D.f[TNW ] = &DD[BSE *size_Mat];
-   //   D.f[BNE ] = &DD[TSW *size_Mat];
-   //   D.f[BSW ] = &DD[TNE *size_Mat];
-   //   D.f[BSE ] = &DD[TNW *size_Mat];
-   //   D.f[BNW ] = &DD[TSE *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //}
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3534,32 +3534,32 @@ extern "C" __global__ void QADBB27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3591,33 +3591,33 @@ extern "C" __global__ void QADBB27( real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[E   ])[ke   ];
-      //real f_E    = (D.f[W   ])[kw   ];
-      //real f_S    = (D.f[N   ])[kn   ];
-      //real f_N    = (D.f[S   ])[ks   ];
-      //real f_B    = (D.f[T   ])[kt   ];
-      //real f_T    = (D.f[B   ])[kb   ];
-      //real f_SW   = (D.f[NE  ])[kne  ];
-      //real f_NE   = (D.f[SW  ])[ksw  ];
-      //real f_NW   = (D.f[SE  ])[kse  ];
-      //real f_SE   = (D.f[NW  ])[knw  ];
-      //real f_BW   = (D.f[TE  ])[kte  ];
-      //real f_TE   = (D.f[BW  ])[kbw  ];
-      //real f_TW   = (D.f[BE  ])[kbe  ];
-      //real f_BE   = (D.f[TW  ])[ktw  ];
-      //real f_BS   = (D.f[TN  ])[ktn  ];
-      //real f_TN   = (D.f[BS  ])[kbs  ];
-      //real f_TS   = (D.f[BN  ])[kbn  ];
-      //real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      //real f_BSW  = (D.f[TNE ])[ktne ];
-      //real f_BNE  = (D.f[TSW ])[ktsw ];
-      //real f_BNW  = (D.f[TSE ])[ktse ];
-      //real f_BSE  = (D.f[TNW ])[ktnw ];
-      //real f_TSW  = (D.f[BNE ])[kbne ];
-      //real f_TNE  = (D.f[BSW ])[kbsw ];
-      //real f_TNW  = (D.f[BSE ])[kbse ];
-      //real f_TSE  = (D.f[BNW ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00   ])[ke   ];
+      //real f_E    = (D.f[DIR_M00   ])[kw   ];
+      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      //real f_B    = (D.f[DIR_00P   ])[kt   ];
+      //real f_T    = (D.f[DIR_00M   ])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_ZERO = (D.f[DIR_000])[kzero];
+      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, /*drho, feq,*/ q;
       real q;
@@ -3644,33 +3644,33 @@ extern "C" __global__ void QADBB27( real* DD,
       //vx2     =  OORho*((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       //vx3     =  OORho*((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[E   ])[ke   ];
-      real f27_E    = (D27.f[W   ])[kw   ];
-      real f27_S    = (D27.f[N   ])[kn   ];
-      real f27_N    = (D27.f[S   ])[ks   ];
-      real f27_B    = (D27.f[T   ])[kt   ];
-      real f27_T    = (D27.f[B   ])[kb   ];
-      real f27_SW   = (D27.f[NE  ])[kne  ];
-      real f27_NE   = (D27.f[SW  ])[ksw  ];
-      real f27_NW   = (D27.f[SE  ])[kse  ];
-      real f27_SE   = (D27.f[NW  ])[knw  ];
-      real f27_BW   = (D27.f[TE  ])[kte  ];
-      real f27_TE   = (D27.f[BW  ])[kbw  ];
-      real f27_TW   = (D27.f[BE  ])[kbe  ];
-      real f27_BE   = (D27.f[TW  ])[ktw  ];
-      real f27_BS   = (D27.f[TN  ])[ktn  ];
-      real f27_TN   = (D27.f[BS  ])[kbs  ];
-      real f27_TS   = (D27.f[BN  ])[kbn  ];
-      real f27_BN   = (D27.f[TS  ])[kts  ];
-      //real f27_ZERO = (D27.f[REST])[kzero];
-      real f27_BSW  = (D27.f[TNE ])[ktne ];
-      real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      real f27_BNW  = (D27.f[TSE ])[ktse ];
-      real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      real f27_TSW  = (D27.f[BNE ])[kbne ];
-      real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      real f27_TNW  = (D27.f[BSE ])[kbse ];
-      real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_ZERO = (D27.f[DIR_000])[kzero];
+      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -3746,94 +3746,94 @@ extern "C" __global__ void QADBB27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=0.1f;
+      //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]=f27_E  ;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]=f27_W  ;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]=f27_N  ;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]=f27_S  ;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]=f27_T  ;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]=f27_B  ;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]=f27_NE ;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]=f27_SW ;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]=f27_SE ;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]=f27_NW ;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]=f27_TE ;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]=f27_BW ;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]=f27_BE ;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]=f27_TW ;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]=f27_TN ;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]=f27_BS ;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]=f27_BN ;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]=f27_TS ;
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]=f27_TNE;
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]=f27_BSW;
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]=f27_BNE;
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]=f27_TSW;
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]=f27_TSE;
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]=f27_BNW;
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]=f27_BSE;
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]=f27_TNW;
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=f27_E  ;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=f27_W  ;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=f27_N  ;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=f27_S  ;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=f27_T  ;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=f27_B  ;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=f27_NE ;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=f27_SW ;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=f27_SE ;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=f27_NW ;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=f27_TE ;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=f27_BW ;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=f27_BE ;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=f27_TW ;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=f27_TN ;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=f27_BS ;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=f27_BN ;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=f27_TS ;
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=f27_TNE;
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=f27_BSW;
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=f27_BNE;
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]=f27_TSW;
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]=f27_TSE;
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=f27_BNW;
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=f27_BSE;
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=f27_TNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3893,7 +3893,7 @@ extern "C" __global__ void QADBB27( real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp7(
+__global__ void QNoSlipADincomp7(
 											 real* DD, 
 											 real* DD7, 
 											 real* temp,
@@ -3911,63 +3911,63 @@ extern "C" __global__ void QNoSlipADincomp7(
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[E   ] = &DD[E   *size_Mat];
-   //   D.f[W   ] = &DD[W   *size_Mat];
-   //   D.f[N   ] = &DD[N   *size_Mat];
-   //   D.f[S   ] = &DD[S   *size_Mat];
-   //   D.f[T   ] = &DD[T   *size_Mat];
-   //   D.f[B   ] = &DD[B   *size_Mat];
-   //   D.f[NE  ] = &DD[NE  *size_Mat];
-   //   D.f[SW  ] = &DD[SW  *size_Mat];
-   //   D.f[SE  ] = &DD[SE  *size_Mat];
-   //   D.f[NW  ] = &DD[NW  *size_Mat];
-   //   D.f[TE  ] = &DD[TE  *size_Mat];
-   //   D.f[BW  ] = &DD[BW  *size_Mat];
-   //   D.f[BE  ] = &DD[BE  *size_Mat];
-   //   D.f[TW  ] = &DD[TW  *size_Mat];
-   //   D.f[TN  ] = &DD[TN  *size_Mat];
-   //   D.f[BS  ] = &DD[BS  *size_Mat];
-   //   D.f[BN  ] = &DD[BN  *size_Mat];
-   //   D.f[TS  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[TNE *size_Mat];
-   //   D.f[TSW ] = &DD[TSW *size_Mat];
-   //   D.f[TSE ] = &DD[TSE *size_Mat];
-   //   D.f[TNW ] = &DD[TNW *size_Mat];
-   //   D.f[BNE ] = &DD[BNE *size_Mat];
-   //   D.f[BSW ] = &DD[BSW *size_Mat];
-   //   D.f[BSE ] = &DD[BSE *size_Mat];
-   //   D.f[BNW ] = &DD[BNW *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //} 
    //else
    //{
-   //   D.f[W   ] = &DD[E   *size_Mat];
-   //   D.f[E   ] = &DD[W   *size_Mat];
-   //   D.f[S   ] = &DD[N   *size_Mat];
-   //   D.f[N   ] = &DD[S   *size_Mat];
-   //   D.f[B   ] = &DD[T   *size_Mat];
-   //   D.f[T   ] = &DD[B   *size_Mat];
-   //   D.f[SW  ] = &DD[NE  *size_Mat];
-   //   D.f[NE  ] = &DD[SW  *size_Mat];
-   //   D.f[NW  ] = &DD[SE  *size_Mat];
-   //   D.f[SE  ] = &DD[NW  *size_Mat];
-   //   D.f[BW  ] = &DD[TE  *size_Mat];
-   //   D.f[TE  ] = &DD[BW  *size_Mat];
-   //   D.f[TW  ] = &DD[BE  *size_Mat];
-   //   D.f[BE  ] = &DD[TW  *size_Mat];
-   //   D.f[BS  ] = &DD[TN  *size_Mat];
-   //   D.f[TN  ] = &DD[BS  *size_Mat];
-   //   D.f[TS  ] = &DD[BN  *size_Mat];
-   //   D.f[BN  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[BSW *size_Mat];
-   //   D.f[TSW ] = &DD[BNE *size_Mat];
-   //   D.f[TSE ] = &DD[BNW *size_Mat];
-   //   D.f[TNW ] = &DD[BSE *size_Mat];
-   //   D.f[BNE ] = &DD[TSW *size_Mat];
-   //   D.f[BSW ] = &DD[TNE *size_Mat];
-   //   D.f[BSE ] = &DD[TNW *size_Mat];
-   //   D.f[BNW ] = &DD[TSE *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //}
 
    Distributions7 D7;
@@ -4009,12 +4009,12 @@ extern "C" __global__ void QNoSlipADincomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB;
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4046,32 +4046,32 @@ extern "C" __global__ void QNoSlipADincomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[E   ])[ke   ];
-      //real f_E    = (D.f[W   ])[kw   ];
-      //real f_S    = (D.f[N   ])[kn   ];
-      //real f_N    = (D.f[S   ])[ks   ];
-      //real f_B    = (D.f[T   ])[kt   ];
-      //real f_T    = (D.f[B   ])[kb   ];
-      //real f_SW   = (D.f[NE  ])[kne  ];
-      //real f_NE   = (D.f[SW  ])[ksw  ];
-      //real f_NW   = (D.f[SE  ])[kse  ];
-      //real f_SE   = (D.f[NW  ])[knw  ];
-      //real f_BW   = (D.f[TE  ])[kte  ];
-      //real f_TE   = (D.f[BW  ])[kbw  ];
-      //real f_TW   = (D.f[BE  ])[kbe  ];
-      //real f_BE   = (D.f[TW  ])[ktw  ];
-      //real f_BS   = (D.f[TN  ])[ktn  ];
-      //real f_TN   = (D.f[BS  ])[kbs  ];
-      //real f_TS   = (D.f[BN  ])[kbn  ];
-      //real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_BSW  = (D.f[TNE ])[ktne ];
-      //real f_BNE  = (D.f[TSW ])[ktsw ];
-      //real f_BNW  = (D.f[TSE ])[ktse ];
-      //real f_BSE  = (D.f[TNW ])[ktnw ];
-      //real f_TSW  = (D.f[BNE ])[kbne ];
-      //real f_TNE  = (D.f[BSW ])[kbsw ];
-      //real f_TNW  = (D.f[BSE ])[kbse ];
-      //real f_TSE  = (D.f[BNW ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00   ])[ke   ];
+      //real f_E    = (D.f[DIR_M00   ])[kw   ];
+      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      //real f_B    = (D.f[DIR_00P   ])[kt   ];
+      //real f_T    = (D.f[DIR_00M   ])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -4317,7 +4317,7 @@ extern "C" __global__ void QNoSlipADincomp7(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QNoSlipADincomp27(
+__global__ void QNoSlipADincomp27(
 											 real* DD, 
 											 real* DD27, 
 											 real* temp,
@@ -4335,125 +4335,125 @@ extern "C" __global__ void QNoSlipADincomp27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4474,32 +4474,32 @@ extern "C" __global__ void QNoSlipADincomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4531,65 +4531,65 @@ extern "C" __global__ void QNoSlipADincomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 =  ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 =  ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      real f27_W    = (D27.f[E   ])[ke   ];
-      real f27_E    = (D27.f[W   ])[kw   ];
-      real f27_S    = (D27.f[N   ])[kn   ];
-      real f27_N    = (D27.f[S   ])[ks   ];
-      real f27_B    = (D27.f[T   ])[kt   ];
-      real f27_T    = (D27.f[B   ])[kb   ];
-      real f27_SW   = (D27.f[NE  ])[kne  ];
-      real f27_NE   = (D27.f[SW  ])[ksw  ];
-      real f27_NW   = (D27.f[SE  ])[kse  ];
-      real f27_SE   = (D27.f[NW  ])[knw  ];
-      real f27_BW   = (D27.f[TE  ])[kte  ];
-      real f27_TE   = (D27.f[BW  ])[kbw  ];
-      real f27_TW   = (D27.f[BE  ])[kbe  ];
-      real f27_BE   = (D27.f[TW  ])[ktw  ];
-      real f27_BS   = (D27.f[TN  ])[ktn  ];
-      real f27_TN   = (D27.f[BS  ])[kbs  ];
-      real f27_TS   = (D27.f[BN  ])[kbn  ];
-      real f27_BN   = (D27.f[TS  ])[kts  ];
-      real f27_ZERO = (D27.f[REST])[kzero];
-      real f27_BSW  = (D27.f[TNE ])[ktne ];
-      real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      real f27_BNW  = (D27.f[TSE ])[ktse ];
-      real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      real f27_TSW  = (D27.f[BNE ])[kbne ];
-      real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      real f27_TNW  = (D27.f[BSE ])[kbse ];
-      real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      real f27_ZERO = (D27.f[DIR_000])[kzero];
+      real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -4665,96 +4665,96 @@ extern "C" __global__ void QNoSlipADincomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=0.1f;
+      //(D.f[DIR_000])[k]=0.1f;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]=(c2o1*feqW27_W  -(f27_E  *(q*omegaD-c1o1)-omegaD*feq27_E  *(q-c1o1))/(omegaD-c1o1)+f27_W  *q)/(q+c1o1);
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]=(c2o1*feqW27_E  -(f27_W  *(q*omegaD-c1o1)-omegaD*feq27_W  *(q-c1o1))/(omegaD-c1o1)+f27_E  *q)/(q+c1o1);
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]=(c2o1*feqW27_S  -(f27_N  *(q*omegaD-c1o1)-omegaD*feq27_N  *(q-c1o1))/(omegaD-c1o1)+f27_S  *q)/(q+c1o1);
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]=(c2o1*feqW27_N  -(f27_S  *(q*omegaD-c1o1)-omegaD*feq27_S  *(q-c1o1))/(omegaD-c1o1)+f27_N  *q)/(q+c1o1);
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]=(c2o1*feqW27_B  -(f27_T  *(q*omegaD-c1o1)-omegaD*feq27_T  *(q-c1o1))/(omegaD-c1o1)+f27_B  *q)/(q+c1o1);
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]=(c2o1*feqW27_T  -(f27_B  *(q*omegaD-c1o1)-omegaD*feq27_B  *(q-c1o1))/(omegaD-c1o1)+f27_T  *q)/(q+c1o1);
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]=(c2o1*feqW27_SW -(f27_NE *(q*omegaD-c1o1)-omegaD*feq27_NE *(q-c1o1))/(omegaD-c1o1)+f27_SW *q)/(q+c1o1);
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]=(c2o1*feqW27_NE -(f27_SW *(q*omegaD-c1o1)-omegaD*feq27_SW *(q-c1o1))/(omegaD-c1o1)+f27_NE *q)/(q+c1o1);
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]=(c2o1*feqW27_NW -(f27_SE *(q*omegaD-c1o1)-omegaD*feq27_SE *(q-c1o1))/(omegaD-c1o1)+f27_NW *q)/(q+c1o1);
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]=(c2o1*feqW27_SE -(f27_NW *(q*omegaD-c1o1)-omegaD*feq27_NW *(q-c1o1))/(omegaD-c1o1)+f27_SE *q)/(q+c1o1);
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]=(c2o1*feqW27_BW -(f27_TE *(q*omegaD-c1o1)-omegaD*feq27_TE *(q-c1o1))/(omegaD-c1o1)+f27_BW *q)/(q+c1o1);
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]=(c2o1*feqW27_TE -(f27_BW *(q*omegaD-c1o1)-omegaD*feq27_BW *(q-c1o1))/(omegaD-c1o1)+f27_TE *q)/(q+c1o1);
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]=(c2o1*feqW27_TW -(f27_BE *(q*omegaD-c1o1)-omegaD*feq27_BE *(q-c1o1))/(omegaD-c1o1)+f27_TW *q)/(q+c1o1);
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]=(c2o1*feqW27_BE -(f27_TW *(q*omegaD-c1o1)-omegaD*feq27_TW *(q-c1o1))/(omegaD-c1o1)+f27_BE *q)/(q+c1o1);
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]=(c2o1*feqW27_BS -(f27_TN *(q*omegaD-c1o1)-omegaD*feq27_TN *(q-c1o1))/(omegaD-c1o1)+f27_BS *q)/(q+c1o1);
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]=(c2o1*feqW27_TN -(f27_BS *(q*omegaD-c1o1)-omegaD*feq27_BS *(q-c1o1))/(omegaD-c1o1)+f27_TN *q)/(q+c1o1);
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]=(c2o1*feqW27_TS -(f27_BN *(q*omegaD-c1o1)-omegaD*feq27_BN *(q-c1o1))/(omegaD-c1o1)+f27_TS *q)/(q+c1o1);
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]=(c2o1*feqW27_BN -(f27_TS *(q*omegaD-c1o1)-omegaD*feq27_TS *(q-c1o1))/(omegaD-c1o1)+f27_BN *q)/(q+c1o1);
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]=(c2o1*feqW27_BSW-(f27_TNE*(q*omegaD-c1o1)-omegaD*feq27_TNE*(q-c1o1))/(omegaD-c1o1)+f27_BSW*q)/(q+c1o1);
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]=(c2o1*feqW27_TNE-(f27_BSW*(q*omegaD-c1o1)-omegaD*feq27_BSW*(q-c1o1))/(omegaD-c1o1)+f27_TNE*q)/(q+c1o1);
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]=(c2o1*feqW27_TSW-(f27_BNE*(q*omegaD-c1o1)-omegaD*feq27_BNE*(q-c1o1))/(omegaD-c1o1)+f27_TSW*q)/(q+c1o1);
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]=(c2o1*feqW27_BNE-(f27_TSW*(q*omegaD-c1o1)-omegaD*feq27_TSW*(q-c1o1))/(omegaD-c1o1)+f27_BNE*q)/(q+c1o1);
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]=(c2o1*feqW27_BNW-(f27_TSE*(q*omegaD-c1o1)-omegaD*feq27_TSE*(q-c1o1))/(omegaD-c1o1)+f27_BNW*q)/(q+c1o1);
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]=(c2o1*feqW27_TSE-(f27_BNW*(q*omegaD-c1o1)-omegaD*feq27_BNW*(q-c1o1))/(omegaD-c1o1)+f27_TSE*q)/(q+c1o1);
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]=(c2o1*feqW27_TNW-(f27_BSE*(q*omegaD-c1o1)-omegaD*feq27_BSE*(q-c1o1))/(omegaD-c1o1)+f27_TNW*q)/(q+c1o1);
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]=(c2o1*feqW27_BSE-(f27_TNW*(q*omegaD-c1o1)-omegaD*feq27_TNW*(q-c1o1))/(omegaD-c1o1)+f27_BSE*q)/(q+c1o1);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4798,7 +4798,7 @@ extern "C" __global__ void QNoSlipADincomp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp7(
+__global__ void QADVeloIncomp7(
 											real* DD, 
 											real* DD7, 
 											real* temp,
@@ -4817,63 +4817,63 @@ extern "C" __global__ void QADVeloIncomp7(
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[E   ] = &DD[E   *size_Mat];
-   //   D.f[W   ] = &DD[W   *size_Mat];
-   //   D.f[N   ] = &DD[N   *size_Mat];
-   //   D.f[S   ] = &DD[S   *size_Mat];
-   //   D.f[T   ] = &DD[T   *size_Mat];
-   //   D.f[B   ] = &DD[B   *size_Mat];
-   //   D.f[NE  ] = &DD[NE  *size_Mat];
-   //   D.f[SW  ] = &DD[SW  *size_Mat];
-   //   D.f[SE  ] = &DD[SE  *size_Mat];
-   //   D.f[NW  ] = &DD[NW  *size_Mat];
-   //   D.f[TE  ] = &DD[TE  *size_Mat];
-   //   D.f[BW  ] = &DD[BW  *size_Mat];
-   //   D.f[BE  ] = &DD[BE  *size_Mat];
-   //   D.f[TW  ] = &DD[TW  *size_Mat];
-   //   D.f[TN  ] = &DD[TN  *size_Mat];
-   //   D.f[BS  ] = &DD[BS  *size_Mat];
-   //   D.f[BN  ] = &DD[BN  *size_Mat];
-   //   D.f[TS  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[TNE *size_Mat];
-   //   D.f[TSW ] = &DD[TSW *size_Mat];
-   //   D.f[TSE ] = &DD[TSE *size_Mat];
-   //   D.f[TNW ] = &DD[TNW *size_Mat];
-   //   D.f[BNE ] = &DD[BNE *size_Mat];
-   //   D.f[BSW ] = &DD[BSW *size_Mat];
-   //   D.f[BSE ] = &DD[BSE *size_Mat];
-   //   D.f[BNW ] = &DD[BNW *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //} 
    //else
    //{
-   //   D.f[W   ] = &DD[E   *size_Mat];
-   //   D.f[E   ] = &DD[W   *size_Mat];
-   //   D.f[S   ] = &DD[N   *size_Mat];
-   //   D.f[N   ] = &DD[S   *size_Mat];
-   //   D.f[B   ] = &DD[T   *size_Mat];
-   //   D.f[T   ] = &DD[B   *size_Mat];
-   //   D.f[SW  ] = &DD[NE  *size_Mat];
-   //   D.f[NE  ] = &DD[SW  *size_Mat];
-   //   D.f[NW  ] = &DD[SE  *size_Mat];
-   //   D.f[SE  ] = &DD[NW  *size_Mat];
-   //   D.f[BW  ] = &DD[TE  *size_Mat];
-   //   D.f[TE  ] = &DD[BW  *size_Mat];
-   //   D.f[TW  ] = &DD[BE  *size_Mat];
-   //   D.f[BE  ] = &DD[TW  *size_Mat];
-   //   D.f[BS  ] = &DD[TN  *size_Mat];
-   //   D.f[TN  ] = &DD[BS  *size_Mat];
-   //   D.f[TS  ] = &DD[BN  *size_Mat];
-   //   D.f[BN  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[BSW *size_Mat];
-   //   D.f[TSW ] = &DD[BNE *size_Mat];
-   //   D.f[TSE ] = &DD[BNW *size_Mat];
-   //   D.f[TNW ] = &DD[BSE *size_Mat];
-   //   D.f[BNE ] = &DD[TSW *size_Mat];
-   //   D.f[BSW ] = &DD[TNE *size_Mat];
-   //   D.f[BSE ] = &DD[TNW *size_Mat];
-   //   D.f[BNW ] = &DD[TSE *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //}
 
    Distributions7 D7;
@@ -4915,12 +4915,12 @@ extern "C" __global__ void QADVeloIncomp7(
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -4952,32 +4952,32 @@ extern "C" __global__ void QADVeloIncomp7(
       //unsigned int ktne = KQK;
       //unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      //real f_W    = (D.f[E   ])[ke   ];
-      //real f_E    = (D.f[W   ])[kw   ];
-      //real f_S    = (D.f[N   ])[kn   ];
-      //real f_N    = (D.f[S   ])[ks   ];
-      //real f_B    = (D.f[T   ])[kt   ];
-      //real f_T    = (D.f[B   ])[kb   ];
-      //real f_SW   = (D.f[NE  ])[kne  ];
-      //real f_NE   = (D.f[SW  ])[ksw  ];
-      //real f_NW   = (D.f[SE  ])[kse  ];
-      //real f_SE   = (D.f[NW  ])[knw  ];
-      //real f_BW   = (D.f[TE  ])[kte  ];
-      //real f_TE   = (D.f[BW  ])[kbw  ];
-      //real f_TW   = (D.f[BE  ])[kbe  ];
-      //real f_BE   = (D.f[TW  ])[ktw  ];
-      //real f_BS   = (D.f[TN  ])[ktn  ];
-      //real f_TN   = (D.f[BS  ])[kbs  ];
-      //real f_TS   = (D.f[BN  ])[kbn  ];
-      //real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_BSW  = (D.f[TNE ])[ktne ];
-      //real f_BNE  = (D.f[TSW ])[ktsw ];
-      //real f_BNW  = (D.f[TSE ])[ktse ];
-      //real f_BSE  = (D.f[TNW ])[ktnw ];
-      //real f_TSW  = (D.f[BNE ])[kbne ];
-      //real f_TNE  = (D.f[BSW ])[kbsw ];
-      //real f_TNW  = (D.f[BSE ])[kbse ];
-      //real f_TSE  = (D.f[BNW ])[kbnw ];
+      //real f_W    = (D.f[DIR_P00   ])[ke   ];
+      //real f_E    = (D.f[DIR_M00   ])[kw   ];
+      //real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      //real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      //real f_B    = (D.f[DIR_00P   ])[kt   ];
+      //real f_T    = (D.f[DIR_00M   ])[kb   ];
+      //real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1_Inflow   = c0o1;
       //real vx2_Inflow   = velo[k];
@@ -5276,7 +5276,7 @@ extern "C" __global__ void QADVeloIncomp7(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADVeloIncomp27(
+__global__ void QADVeloIncomp27(
 											real* DD, 
 											real* DD27, 
 											real* temp,
@@ -5295,125 +5295,125 @@ extern "C" __global__ void QADVeloIncomp27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5434,32 +5434,32 @@ extern "C" __global__ void QADVeloIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5491,65 +5491,65 @@ extern "C" __global__ void QADVeloIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3 = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[E   ])[ke   ];
-      //real f27_E    = (D27.f[W   ])[kw   ];
-      //real f27_S    = (D27.f[N   ])[kn   ];
-      //real f27_N    = (D27.f[S   ])[ks   ];
-      //real f27_B    = (D27.f[T   ])[kt   ];
-      //real f27_T    = (D27.f[B   ])[kb   ];
-      //real f27_SW   = (D27.f[NE  ])[kne  ];
-      //real f27_NE   = (D27.f[SW  ])[ksw  ];
-      //real f27_NW   = (D27.f[SE  ])[kse  ];
-      //real f27_SE   = (D27.f[NW  ])[knw  ];
-      //real f27_BW   = (D27.f[TE  ])[kte  ];
-      //real f27_TE   = (D27.f[BW  ])[kbw  ];
-      //real f27_TW   = (D27.f[BE  ])[kbe  ];
-      //real f27_BE   = (D27.f[TW  ])[ktw  ];
-      //real f27_BS   = (D27.f[TN  ])[ktn  ];
-      //real f27_TN   = (D27.f[BS  ])[kbs  ];
-      //real f27_TS   = (D27.f[BN  ])[kbn  ];
-      //real f27_BN   = (D27.f[TS  ])[kts  ];
-      //real f27_ZERO = (D27.f[REST])[kzero];
-      //real f27_BSW  = (D27.f[TNE ])[ktne ];
-      //real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      //real f27_BNW  = (D27.f[TSE ])[ktse ];
-      //real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      //real f27_TSW  = (D27.f[BNE ])[kbne ];
-      //real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      //real f27_TNW  = (D27.f[BSE ])[kbse ];
-      //real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_ZERO = (D27.f[DIR_000])[kzero];
+      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -5630,122 +5630,122 @@ extern "C" __global__ void QADVeloIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[W  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[E  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[S  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[N  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[B  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[T  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[SW ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[NE ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[NW ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[SE ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[BW ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[TE ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[TW ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[BE ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[BS ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[TN ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[TS ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[BN ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
-      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[BSW])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
-      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[TNE])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
-      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[TSW])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
-      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[BNE])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
-      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[BNW])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
-      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[TSE])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
-      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[TNW])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
-      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[BSE])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
+      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
+      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
+      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPM])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
+      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPM])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
+      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMP])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
+      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPP])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
+      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMM])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -5789,9 +5789,7 @@ extern "C" __global__ void QADVeloIncomp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressIncomp7(int inx,
-										   int iny,
-										   real* DD, 
+__global__ void QADPressIncomp7( real* DD, 
 										   real* DD7, 
 										   real* temp,
 										   real* velo,
@@ -5809,63 +5807,63 @@ extern "C" __global__ void QADPressIncomp7(int inx,
   /* Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }*/
 
    Distributions7 D7;
@@ -5907,12 +5905,12 @@ extern "C" __global__ void QADPressIncomp7(int inx,
       //////////////////////////////////////////////////////////////////////////////////
       real  *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB; 
 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
       //////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5947,32 +5945,32 @@ extern "C" __global__ void QADPressIncomp7(int inx,
     /*  real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];*/
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];*/
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       //real vx2 = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
@@ -6229,7 +6227,7 @@ extern "C" __global__ void QADPressIncomp7(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QADPressIncomp27(
+__global__ void QADPressIncomp27(
 											   real* DD,
 											   real* DD27,
 											   real* temp,
@@ -6248,125 +6246,125 @@ extern "C" __global__ void QADPressIncomp27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
 
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    } 
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -6387,32 +6385,32 @@ extern "C" __global__ void QADPressIncomp27(
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -6444,65 +6442,65 @@ extern "C" __global__ void QADPressIncomp27(
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_TSE-f_BNW)+(f_BSE-f_TNW) +(f_NE-f_SW)+(f_SE-f_NW)+(f_TE-f_BW)+(f_BE-f_TW)+(f_E-f_W));
       real vx2      = ((f_TNE-f_BSW)+(f_BNE-f_TSW)+(f_BNW-f_TSE)+(f_TNW-f_BSE) +(f_NE-f_SW)+(f_NW-f_SE)+(f_TN-f_BS)+(f_BN-f_TS)+(f_N-f_S));
       real vx3      = ((f_TNE-f_BSW)+(f_TSW-f_BNE)+(f_TSE-f_BNW)+(f_TNW-f_BSE) +(f_TE-f_BW)+(f_TW-f_BE)+(f_TN-f_BS)+(f_TS-f_BN)+(f_T-f_B));
       ////////////////////////////////////////////////////////////////////////////////
-      //real f27_W    = (D27.f[E   ])[ke   ];
-      //real f27_E    = (D27.f[W   ])[kw   ];
-      //real f27_S    = (D27.f[N   ])[kn   ];
-      //real f27_N    = (D27.f[S   ])[ks   ];
-      //real f27_B    = (D27.f[T   ])[kt   ];
-      //real f27_T    = (D27.f[B   ])[kb   ];
-      //real f27_SW   = (D27.f[NE  ])[kne  ];
-      //real f27_NE   = (D27.f[SW  ])[ksw  ];
-      //real f27_NW   = (D27.f[SE  ])[kse  ];
-      //real f27_SE   = (D27.f[NW  ])[knw  ];
-      //real f27_BW   = (D27.f[TE  ])[kte  ];
-      //real f27_TE   = (D27.f[BW  ])[kbw  ];
-      //real f27_TW   = (D27.f[BE  ])[kbe  ];
-      //real f27_BE   = (D27.f[TW  ])[ktw  ];
-      //real f27_BS   = (D27.f[TN  ])[ktn  ];
-      //real f27_TN   = (D27.f[BS  ])[kbs  ];
-      //real f27_TS   = (D27.f[BN  ])[kbn  ];
-      //real f27_BN   = (D27.f[TS  ])[kts  ];
-      //real f27_ZERO = (D27.f[REST])[kzero];
-      //real f27_BSW  = (D27.f[TNE ])[ktne ];
-      //real f27_BNE  = (D27.f[TSW ])[ktsw ];
-      //real f27_BNW  = (D27.f[TSE ])[ktse ];
-      //real f27_BSE  = (D27.f[TNW ])[ktnw ];
-      //real f27_TSW  = (D27.f[BNE ])[kbne ];
-      //real f27_TNE  = (D27.f[BSW ])[kbsw ];
-      //real f27_TNW  = (D27.f[BSE ])[kbse ];
-      //real f27_TSE  = (D27.f[BNW ])[kbnw ];
+      //real f27_W    = (D27.f[DIR_P00   ])[ke   ];
+      //real f27_E    = (D27.f[DIR_M00   ])[kw   ];
+      //real f27_S    = (D27.f[DIR_0P0   ])[kn   ];
+      //real f27_N    = (D27.f[DIR_0M0   ])[ks   ];
+      //real f27_B    = (D27.f[DIR_00P   ])[kt   ];
+      //real f27_T    = (D27.f[DIR_00M   ])[kb   ];
+      //real f27_SW   = (D27.f[DIR_PP0  ])[kne  ];
+      //real f27_NE   = (D27.f[DIR_MM0  ])[ksw  ];
+      //real f27_NW   = (D27.f[DIR_PM0  ])[kse  ];
+      //real f27_SE   = (D27.f[DIR_MP0  ])[knw  ];
+      //real f27_BW   = (D27.f[DIR_P0P  ])[kte  ];
+      //real f27_TE   = (D27.f[DIR_M0M  ])[kbw  ];
+      //real f27_TW   = (D27.f[DIR_P0M  ])[kbe  ];
+      //real f27_BE   = (D27.f[DIR_M0P  ])[ktw  ];
+      //real f27_BS   = (D27.f[DIR_0PP  ])[ktn  ];
+      //real f27_TN   = (D27.f[DIR_0MM  ])[kbs  ];
+      //real f27_TS   = (D27.f[DIR_0PM  ])[kbn  ];
+      //real f27_BN   = (D27.f[DIR_0MP  ])[kts  ];
+      //real f27_ZERO = (D27.f[DIR_000])[kzero];
+      //real f27_BSW  = (D27.f[DIR_PPP ])[ktne ];
+      //real f27_BNE  = (D27.f[DIR_MMP ])[ktsw ];
+      //real f27_BNW  = (D27.f[DIR_PMP ])[ktse ];
+      //real f27_BSE  = (D27.f[DIR_MPP ])[ktnw ];
+      //real f27_TSW  = (D27.f[DIR_PPM ])[kbne ];
+      //real f27_TNE  = (D27.f[DIR_MMM ])[kbsw ];
+      //real f27_TNW  = (D27.f[DIR_PMM ])[kbse ];
+      //real f27_TSE  = (D27.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////
@@ -6583,122 +6581,122 @@ extern "C" __global__ void QADPressIncomp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D27.f[E   ] = &DD27[E   *size_Mat];
-         D27.f[W   ] = &DD27[W   *size_Mat];
-         D27.f[N   ] = &DD27[N   *size_Mat];
-         D27.f[S   ] = &DD27[S   *size_Mat];
-         D27.f[T   ] = &DD27[T   *size_Mat];
-         D27.f[B   ] = &DD27[B   *size_Mat];
-         D27.f[NE  ] = &DD27[NE  *size_Mat];
-         D27.f[SW  ] = &DD27[SW  *size_Mat];
-         D27.f[SE  ] = &DD27[SE  *size_Mat];
-         D27.f[NW  ] = &DD27[NW  *size_Mat];
-         D27.f[TE  ] = &DD27[TE  *size_Mat];
-         D27.f[BW  ] = &DD27[BW  *size_Mat];
-         D27.f[BE  ] = &DD27[BE  *size_Mat];
-         D27.f[TW  ] = &DD27[TW  *size_Mat];
-         D27.f[TN  ] = &DD27[TN  *size_Mat];
-         D27.f[BS  ] = &DD27[BS  *size_Mat];
-         D27.f[BN  ] = &DD27[BN  *size_Mat];
-         D27.f[TS  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[TNE *size_Mat];
-         D27.f[TSW ] = &DD27[TSW *size_Mat];
-         D27.f[TSE ] = &DD27[TSE *size_Mat];
-         D27.f[TNW ] = &DD27[TNW *size_Mat];
-         D27.f[BNE ] = &DD27[BNE *size_Mat];
-         D27.f[BSW ] = &DD27[BSW *size_Mat];
-         D27.f[BSE ] = &DD27[BSE *size_Mat];
-         D27.f[BNW ] = &DD27[BNW *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
       } 
       else
       {
-         D27.f[W   ] = &DD27[E   *size_Mat];
-         D27.f[E   ] = &DD27[W   *size_Mat];
-         D27.f[S   ] = &DD27[N   *size_Mat];
-         D27.f[N   ] = &DD27[S   *size_Mat];
-         D27.f[B   ] = &DD27[T   *size_Mat];
-         D27.f[T   ] = &DD27[B   *size_Mat];
-         D27.f[SW  ] = &DD27[NE  *size_Mat];
-         D27.f[NE  ] = &DD27[SW  *size_Mat];
-         D27.f[NW  ] = &DD27[SE  *size_Mat];
-         D27.f[SE  ] = &DD27[NW  *size_Mat];
-         D27.f[BW  ] = &DD27[TE  *size_Mat];
-         D27.f[TE  ] = &DD27[BW  *size_Mat];
-         D27.f[TW  ] = &DD27[BE  *size_Mat];
-         D27.f[BE  ] = &DD27[TW  *size_Mat];
-         D27.f[BS  ] = &DD27[TN  *size_Mat];
-         D27.f[TN  ] = &DD27[BS  *size_Mat];
-         D27.f[TS  ] = &DD27[BN  *size_Mat];
-         D27.f[BN  ] = &DD27[TS  *size_Mat];
-         D27.f[REST] = &DD27[REST*size_Mat];
-         D27.f[TNE ] = &DD27[BSW *size_Mat];
-         D27.f[TSW ] = &DD27[BNE *size_Mat];
-         D27.f[TSE ] = &DD27[BNW *size_Mat];
-         D27.f[TNW ] = &DD27[BSE *size_Mat];
-         D27.f[BNE ] = &DD27[TSW *size_Mat];
-         D27.f[BSW ] = &DD27[TNE *size_Mat];
-         D27.f[BSE ] = &DD27[TNW *size_Mat];
-         D27.f[BNW ] = &DD27[TSE *size_Mat];
+         D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+         D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+         D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+         D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+         D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+         D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+         D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+         D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+         D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+         D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+         D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+         D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+         D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+         D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+         D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+         D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+         D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+         D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+         D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+         D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+         D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+         D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
+         D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+         D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+         D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+         D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+         D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real q;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[W  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
-      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[E  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[S  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[N  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[B  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[T  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SW ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NE ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[NW ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[SE ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BW ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TE ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TW ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BE ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BS ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TN ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[TS ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[BN ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSW])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNE])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSW])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNE])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BNW])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TSE])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[TNW])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[BSE])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
-      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[W  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
-      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[E  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
-      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[S  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
-      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[N  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
-      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[B  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
-      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[T  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
-      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[SW ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
-      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[NE ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
-      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[NW ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
-      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[SE ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
-      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[BW ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
-      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[TE ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
-      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[TW ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
-      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[BE ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
-      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[BS ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
-      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[TN ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
-      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[TS ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
-      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[BN ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
-      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[BSW])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
-      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[TNE])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
-      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[TSW])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
-      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[BNE])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
-      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[BNW])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
-      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[TSE])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
-      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[TNW])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
-      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[BSE])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M00  ])[kw  ]= -feqW27_W  + c2o1 * c2o27  * TempD;
+      q = q_dirW[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P00  ])[ke  ]= -feqW27_E  + c2o1 * c2o27  * TempD;
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0M0  ])[ks  ]= -feqW27_S  + c2o1 * c2o27  * TempD;
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0P0  ])[kn  ]= -feqW27_N  + c2o1 * c2o27  * TempD;
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00M  ])[kb  ]= -feqW27_B  + c2o1 * c2o27  * TempD;
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1) (D27.f[DIR_00P  ])[kt  ]= -feqW27_T  + c2o1 * c2o27  * TempD;
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MM0 ])[ksw ]= -feqW27_SW + c2o1 * c1o54  * TempD;
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PP0 ])[kne ]= -feqW27_NE + c2o1 * c1o54  * TempD;
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MP0 ])[knw ]= -feqW27_NW + c2o1 * c1o54  * TempD;
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PM0 ])[kse ]= -feqW27_SE + c2o1 * c1o54  * TempD;
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0M ])[kbw ]= -feqW27_BW + c2o1 * c1o54  * TempD;
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0P ])[kte ]= -feqW27_TE + c2o1 * c1o54  * TempD;
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_M0P ])[ktw ]= -feqW27_TW + c2o1 * c1o54  * TempD;
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_P0M ])[kbe ]= -feqW27_BE + c2o1 * c1o54  * TempD;
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MM ])[kbs ]= -feqW27_BS + c2o1 * c1o54  * TempD;
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PP ])[ktn ]= -feqW27_TN + c2o1 * c1o54  * TempD;
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0MP ])[kts ]= -feqW27_TS + c2o1 * c1o54  * TempD;
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1) (D27.f[DIR_0PM ])[kbn ]= -feqW27_BN + c2o1 * c1o54  * TempD;
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMM])[kbsw]= -feqW27_BSW+ c2o1 * c1o216 * TempD;
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPP])[ktne]= -feqW27_TNE+ c2o1 * c1o216 * TempD;
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MMP])[ktsw]= -feqW27_TSW+ c2o1 * c1o216 * TempD;
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PPM])[kbne]= -feqW27_BNE+ c2o1 * c1o216 * TempD;
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPM])[kbnw]= -feqW27_BNW+ c2o1 * c1o216 * TempD;
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMP])[ktse]= -feqW27_TSE+ c2o1 * c1o216 * TempD;
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_MPP])[ktnw]= -feqW27_TNW+ c2o1 * c1o216 * TempD;
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1) (D27.f[DIR_PMM])[kbse]= -feqW27_BSE+ c2o1 * c1o216 * TempD;
+      //q = q_dirE[k];   if (q>=zero && q<=one) (D27.f[DIR_M00  ])[kw  ]=(two*feqW27_W  -(f27_E  *(q*omegaD-one)-omegaD*feq27_E  *(q-one))/(omegaD-one)+f27_W  *q)/(q+one);
+      //q = q_dirW[k];   if (q>=zero && q<=one) (D27.f[DIR_P00  ])[ke  ]=(two*feqW27_E  -(f27_W  *(q*omegaD-one)-omegaD*feq27_W  *(q-one))/(omegaD-one)+f27_E  *q)/(q+one);
+      //q = q_dirN[k];   if (q>=zero && q<=one) (D27.f[DIR_0M0  ])[ks  ]=(two*feqW27_S  -(f27_N  *(q*omegaD-one)-omegaD*feq27_N  *(q-one))/(omegaD-one)+f27_S  *q)/(q+one);
+      //q = q_dirS[k];   if (q>=zero && q<=one) (D27.f[DIR_0P0  ])[kn  ]=(two*feqW27_N  -(f27_S  *(q*omegaD-one)-omegaD*feq27_S  *(q-one))/(omegaD-one)+f27_N  *q)/(q+one);
+      //q = q_dirT[k];   if (q>=zero && q<=one) (D27.f[DIR_00M  ])[kb  ]=(two*feqW27_B  -(f27_T  *(q*omegaD-one)-omegaD*feq27_T  *(q-one))/(omegaD-one)+f27_B  *q)/(q+one);
+      //q = q_dirB[k];   if (q>=zero && q<=one) (D27.f[DIR_00P  ])[kt  ]=(two*feqW27_T  -(f27_B  *(q*omegaD-one)-omegaD*feq27_B  *(q-one))/(omegaD-one)+f27_T  *q)/(q+one);
+      //q = q_dirNE[k];  if (q>=zero && q<=one) (D27.f[DIR_MM0 ])[ksw ]=(two*feqW27_SW -(f27_NE *(q*omegaD-one)-omegaD*feq27_NE *(q-one))/(omegaD-one)+f27_SW *q)/(q+one);
+      //q = q_dirSW[k];  if (q>=zero && q<=one) (D27.f[DIR_PP0 ])[kne ]=(two*feqW27_NE -(f27_SW *(q*omegaD-one)-omegaD*feq27_SW *(q-one))/(omegaD-one)+f27_NE *q)/(q+one);
+      //q = q_dirSE[k];  if (q>=zero && q<=one) (D27.f[DIR_MP0 ])[knw ]=(two*feqW27_NW -(f27_SE *(q*omegaD-one)-omegaD*feq27_SE *(q-one))/(omegaD-one)+f27_NW *q)/(q+one);
+      //q = q_dirNW[k];  if (q>=zero && q<=one) (D27.f[DIR_PM0 ])[kse ]=(two*feqW27_SE -(f27_NW *(q*omegaD-one)-omegaD*feq27_NW *(q-one))/(omegaD-one)+f27_SE *q)/(q+one);
+      //q = q_dirTE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0M ])[kbw ]=(two*feqW27_BW -(f27_TE *(q*omegaD-one)-omegaD*feq27_TE *(q-one))/(omegaD-one)+f27_BW *q)/(q+one);
+      //q = q_dirBW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0P ])[kte ]=(two*feqW27_TE -(f27_BW *(q*omegaD-one)-omegaD*feq27_BW *(q-one))/(omegaD-one)+f27_TE *q)/(q+one);
+      //q = q_dirBE[k];  if (q>=zero && q<=one) (D27.f[DIR_M0P ])[ktw ]=(two*feqW27_TW -(f27_BE *(q*omegaD-one)-omegaD*feq27_BE *(q-one))/(omegaD-one)+f27_TW *q)/(q+one);
+      //q = q_dirTW[k];  if (q>=zero && q<=one) (D27.f[DIR_P0M ])[kbe ]=(two*feqW27_BE -(f27_TW *(q*omegaD-one)-omegaD*feq27_TW *(q-one))/(omegaD-one)+f27_BE *q)/(q+one);
+      //q = q_dirTN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MM ])[kbs ]=(two*feqW27_BS -(f27_TN *(q*omegaD-one)-omegaD*feq27_TN *(q-one))/(omegaD-one)+f27_BS *q)/(q+one);
+      //q = q_dirBS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PP ])[ktn ]=(two*feqW27_TN -(f27_BS *(q*omegaD-one)-omegaD*feq27_BS *(q-one))/(omegaD-one)+f27_TN *q)/(q+one);
+      //q = q_dirBN[k];  if (q>=zero && q<=one) (D27.f[DIR_0MP ])[kts ]=(two*feqW27_TS -(f27_BN *(q*omegaD-one)-omegaD*feq27_BN *(q-one))/(omegaD-one)+f27_TS *q)/(q+one);
+      //q = q_dirTS[k];  if (q>=zero && q<=one) (D27.f[DIR_0PM ])[kbn ]=(two*feqW27_BN -(f27_TS *(q*omegaD-one)-omegaD*feq27_TS *(q-one))/(omegaD-one)+f27_BN *q)/(q+one);
+      //q = q_dirTNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMM])[kbsw]=(two*feqW27_BSW-(f27_TNE*(q*omegaD-one)-omegaD*feq27_TNE*(q-one))/(omegaD-one)+f27_BSW*q)/(q+one);
+      //q = q_dirBSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPP])[ktne]=(two*feqW27_TNE-(f27_BSW*(q*omegaD-one)-omegaD*feq27_BSW*(q-one))/(omegaD-one)+f27_TNE*q)/(q+one);
+      //q = q_dirBNE[k]; if (q>=zero && q<=one) (D27.f[DIR_MMP])[ktsw]=(two*feqW27_TSW-(f27_BNE*(q*omegaD-one)-omegaD*feq27_BNE*(q-one))/(omegaD-one)+f27_TSW*q)/(q+one);
+      //q = q_dirTSW[k]; if (q>=zero && q<=one) (D27.f[DIR_PPM])[kbne]=(two*feqW27_BNE-(f27_TSW*(q*omegaD-one)-omegaD*feq27_TSW*(q-one))/(omegaD-one)+f27_BNE*q)/(q+one);
+      //q = q_dirTSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPM])[kbnw]=(two*feqW27_BNW-(f27_TSE*(q*omegaD-one)-omegaD*feq27_TSE*(q-one))/(omegaD-one)+f27_BNW*q)/(q+one);
+      //q = q_dirBNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMP])[ktse]=(two*feqW27_TSE-(f27_BNW*(q*omegaD-one)-omegaD*feq27_BNW*(q-one))/(omegaD-one)+f27_TSE*q)/(q+one);
+      //q = q_dirBSE[k]; if (q>=zero && q<=one) (D27.f[DIR_MPP])[ktnw]=(two*feqW27_TNW-(f27_BSE*(q*omegaD-one)-omegaD*feq27_BSE*(q-one))/(omegaD-one)+f27_TNW*q)/(q+one);
+      //q = q_dirTNW[k]; if (q>=zero && q<=one) (D27.f[DIR_PMM])[kbse]=(two*feqW27_BSE-(f27_TNW*(q*omegaD-one)-omegaD*feq27_TNW*(q-one))/(omegaD-one)+f27_BSE*q)/(q+one);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -6728,7 +6726,7 @@ inline __device__ real calcDistributionBC_AD(real q, real weight, real v, real v
 
 // has to be excecuted before Fluid BCs
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void AD_SlipVelDeviceComp(
+__global__ void AD_SlipVelDeviceComp(
     real *normalX,
     real *normalY,
     real *normalZ,
@@ -6747,125 +6745,125 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
     Distributions27 D;
     if (isEvenTimestep)
     {
-        D.f[E   ] = &distributions[E    * size_Mat];
-        D.f[W   ] = &distributions[W    * size_Mat];
-        D.f[N   ] = &distributions[N    * size_Mat];
-        D.f[S   ] = &distributions[S    * size_Mat];
-        D.f[T   ] = &distributions[T    * size_Mat];
-        D.f[B   ] = &distributions[B    * size_Mat];
-        D.f[NE  ] = &distributions[NE   * size_Mat];
-        D.f[SW  ] = &distributions[SW   * size_Mat];
-        D.f[SE  ] = &distributions[SE   * size_Mat];
-        D.f[NW  ] = &distributions[NW   * size_Mat];
-        D.f[TE  ] = &distributions[TE   * size_Mat];
-        D.f[BW  ] = &distributions[BW   * size_Mat];
-        D.f[BE  ] = &distributions[BE   * size_Mat];
-        D.f[TW  ] = &distributions[TW   * size_Mat];
-        D.f[TN  ] = &distributions[TN   * size_Mat];
-        D.f[BS  ] = &distributions[BS   * size_Mat];
-        D.f[BN  ] = &distributions[BN   * size_Mat];
-        D.f[TS  ] = &distributions[TS   * size_Mat];
-        D.f[REST] = &distributions[REST * size_Mat];
-        D.f[TNE ] = &distributions[TNE  * size_Mat];
-        D.f[TSW ] = &distributions[TSW  * size_Mat];
-        D.f[TSE ] = &distributions[TSE  * size_Mat];
-        D.f[TNW ] = &distributions[TNW  * size_Mat];
-        D.f[BNE ] = &distributions[BNE  * size_Mat];
-        D.f[BSW ] = &distributions[BSW  * size_Mat];
-        D.f[BSE ] = &distributions[BSE  * size_Mat];
-        D.f[BNW ] = &distributions[BNW  * size_Mat];
+        D.f[DIR_P00   ] = &distributions[DIR_P00    * size_Mat];
+        D.f[DIR_M00   ] = &distributions[DIR_M00    * size_Mat];
+        D.f[DIR_0P0   ] = &distributions[DIR_0P0    * size_Mat];
+        D.f[DIR_0M0   ] = &distributions[DIR_0M0    * size_Mat];
+        D.f[DIR_00P   ] = &distributions[DIR_00P    * size_Mat];
+        D.f[DIR_00M   ] = &distributions[DIR_00M    * size_Mat];
+        D.f[DIR_PP0  ] = &distributions[DIR_PP0   * size_Mat];
+        D.f[DIR_MM0  ] = &distributions[DIR_MM0   * size_Mat];
+        D.f[DIR_PM0  ] = &distributions[DIR_PM0   * size_Mat];
+        D.f[DIR_MP0  ] = &distributions[DIR_MP0   * size_Mat];
+        D.f[DIR_P0P  ] = &distributions[DIR_P0P   * size_Mat];
+        D.f[DIR_M0M  ] = &distributions[DIR_M0M   * size_Mat];
+        D.f[DIR_P0M  ] = &distributions[DIR_P0M   * size_Mat];
+        D.f[DIR_M0P  ] = &distributions[DIR_M0P   * size_Mat];
+        D.f[DIR_0PP  ] = &distributions[DIR_0PP   * size_Mat];
+        D.f[DIR_0MM  ] = &distributions[DIR_0MM   * size_Mat];
+        D.f[DIR_0PM  ] = &distributions[DIR_0PM   * size_Mat];
+        D.f[DIR_0MP  ] = &distributions[DIR_0MP   * size_Mat];
+        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+        D.f[DIR_PPP ] = &distributions[DIR_PPP  * size_Mat];
+        D.f[DIR_MMP ] = &distributions[DIR_MMP  * size_Mat];
+        D.f[DIR_PMP ] = &distributions[DIR_PMP  * size_Mat];
+        D.f[DIR_MPP ] = &distributions[DIR_MPP  * size_Mat];
+        D.f[DIR_PPM ] = &distributions[DIR_PPM  * size_Mat];
+        D.f[DIR_MMM ] = &distributions[DIR_MMM  * size_Mat];
+        D.f[DIR_PMM ] = &distributions[DIR_PMM  * size_Mat];
+        D.f[DIR_MPM ] = &distributions[DIR_MPM  * size_Mat];
     }
     else
     {
-        D.f[W   ] = &distributions[E    * size_Mat];
-        D.f[E   ] = &distributions[W    * size_Mat];
-        D.f[S   ] = &distributions[N    * size_Mat];
-        D.f[N   ] = &distributions[S    * size_Mat];
-        D.f[B   ] = &distributions[T    * size_Mat];
-        D.f[T   ] = &distributions[B    * size_Mat];
-        D.f[SW  ] = &distributions[NE   * size_Mat];
-        D.f[NE  ] = &distributions[SW   * size_Mat];
-        D.f[NW  ] = &distributions[SE   * size_Mat];
-        D.f[SE  ] = &distributions[NW   * size_Mat];
-        D.f[BW  ] = &distributions[TE   * size_Mat];
-        D.f[TE  ] = &distributions[BW   * size_Mat];
-        D.f[TW  ] = &distributions[BE   * size_Mat];
-        D.f[BE  ] = &distributions[TW   * size_Mat];
-        D.f[BS  ] = &distributions[TN   * size_Mat];
-        D.f[TN  ] = &distributions[BS   * size_Mat];
-        D.f[TS  ] = &distributions[BN   * size_Mat];
-        D.f[BN  ] = &distributions[TS   * size_Mat];
-        D.f[REST] = &distributions[REST * size_Mat];
-        D.f[TNE ] = &distributions[BSW  * size_Mat];
-        D.f[TSW ] = &distributions[BNE  * size_Mat];
-        D.f[TSE ] = &distributions[BNW  * size_Mat];
-        D.f[TNW ] = &distributions[BSE  * size_Mat];
-        D.f[BNE ] = &distributions[TSW  * size_Mat];
-        D.f[BSW ] = &distributions[TNE  * size_Mat];
-        D.f[BSE ] = &distributions[TNW  * size_Mat];
-        D.f[BNW ] = &distributions[TSE  * size_Mat];
+        D.f[DIR_M00   ] = &distributions[DIR_P00    * size_Mat];
+        D.f[DIR_P00   ] = &distributions[DIR_M00    * size_Mat];
+        D.f[DIR_0M0   ] = &distributions[DIR_0P0    * size_Mat];
+        D.f[DIR_0P0   ] = &distributions[DIR_0M0    * size_Mat];
+        D.f[DIR_00M   ] = &distributions[DIR_00P    * size_Mat];
+        D.f[DIR_00P   ] = &distributions[DIR_00M    * size_Mat];
+        D.f[DIR_MM0  ] = &distributions[DIR_PP0   * size_Mat];
+        D.f[DIR_PP0  ] = &distributions[DIR_MM0   * size_Mat];
+        D.f[DIR_MP0  ] = &distributions[DIR_PM0   * size_Mat];
+        D.f[DIR_PM0  ] = &distributions[DIR_MP0   * size_Mat];
+        D.f[DIR_M0M  ] = &distributions[DIR_P0P   * size_Mat];
+        D.f[DIR_P0P  ] = &distributions[DIR_M0M   * size_Mat];
+        D.f[DIR_M0P  ] = &distributions[DIR_P0M   * size_Mat];
+        D.f[DIR_P0M  ] = &distributions[DIR_M0P   * size_Mat];
+        D.f[DIR_0MM  ] = &distributions[DIR_0PP   * size_Mat];
+        D.f[DIR_0PP  ] = &distributions[DIR_0MM   * size_Mat];
+        D.f[DIR_0MP  ] = &distributions[DIR_0PM   * size_Mat];
+        D.f[DIR_0PM  ] = &distributions[DIR_0MP   * size_Mat];
+        D.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+        D.f[DIR_PPP ] = &distributions[DIR_MMM  * size_Mat];
+        D.f[DIR_MMP ] = &distributions[DIR_PPM  * size_Mat];
+        D.f[DIR_PMP ] = &distributions[DIR_MPM  * size_Mat];
+        D.f[DIR_MPP ] = &distributions[DIR_PMM  * size_Mat];
+        D.f[DIR_PPM ] = &distributions[DIR_MMP  * size_Mat];
+        D.f[DIR_MMM ] = &distributions[DIR_PPP  * size_Mat];
+        D.f[DIR_PMM ] = &distributions[DIR_MPP  * size_Mat];
+        D.f[DIR_MPM ] = &distributions[DIR_PMP  * size_Mat];
     }
     ////////////////////////////////////////////////////////////////////////////////
     Distributions27 DAD;
     if (isEvenTimestep)
     {
-        DAD.f[E   ] = &distributionsAD[E    * size_Mat];
-        DAD.f[W   ] = &distributionsAD[W    * size_Mat];
-        DAD.f[N   ] = &distributionsAD[N    * size_Mat];
-        DAD.f[S   ] = &distributionsAD[S    * size_Mat];
-        DAD.f[T   ] = &distributionsAD[T    * size_Mat];
-        DAD.f[B   ] = &distributionsAD[B    * size_Mat];
-        DAD.f[NE  ] = &distributionsAD[NE   * size_Mat];
-        DAD.f[SW  ] = &distributionsAD[SW   * size_Mat];
-        DAD.f[SE  ] = &distributionsAD[SE   * size_Mat];
-        DAD.f[NW  ] = &distributionsAD[NW   * size_Mat];
-        DAD.f[TE  ] = &distributionsAD[TE   * size_Mat];
-        DAD.f[BW  ] = &distributionsAD[BW   * size_Mat];
-        DAD.f[BE  ] = &distributionsAD[BE   * size_Mat];
-        DAD.f[TW  ] = &distributionsAD[TW   * size_Mat];
-        DAD.f[TN  ] = &distributionsAD[TN   * size_Mat];
-        DAD.f[BS  ] = &distributionsAD[BS   * size_Mat];
-        DAD.f[BN  ] = &distributionsAD[BN   * size_Mat];
-        DAD.f[TS  ] = &distributionsAD[TS   * size_Mat];
-        DAD.f[REST] = &distributionsAD[REST * size_Mat];
-        DAD.f[TNE ] = &distributionsAD[TNE  * size_Mat];
-        DAD.f[TSW ] = &distributionsAD[TSW  * size_Mat];
-        DAD.f[TSE ] = &distributionsAD[TSE  * size_Mat];
-        DAD.f[TNW ] = &distributionsAD[TNW  * size_Mat];
-        DAD.f[BNE ] = &distributionsAD[BNE  * size_Mat];
-        DAD.f[BSW ] = &distributionsAD[BSW  * size_Mat];
-        DAD.f[BSE ] = &distributionsAD[BSE  * size_Mat];
-        DAD.f[BNW ] = &distributionsAD[BNW  * size_Mat];
+        DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
+        DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
+        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
+        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
+        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
+        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
+        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
+        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
+        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
+        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
+        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
+        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
+        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
+        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
+        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
+        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
+        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
+        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
+        DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
+        DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
+        DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
+        DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
+        DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
+        DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
+        DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
+        DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
     }
     else
     {
-        DAD.f[W   ] = &distributionsAD[E    * size_Mat];
-        DAD.f[E   ] = &distributionsAD[W    * size_Mat];
-        DAD.f[S   ] = &distributionsAD[N    * size_Mat];
-        DAD.f[N   ] = &distributionsAD[S    * size_Mat];
-        DAD.f[B   ] = &distributionsAD[T    * size_Mat];
-        DAD.f[T   ] = &distributionsAD[B    * size_Mat];
-        DAD.f[SW  ] = &distributionsAD[NE   * size_Mat];
-        DAD.f[NE  ] = &distributionsAD[SW   * size_Mat];
-        DAD.f[NW  ] = &distributionsAD[SE   * size_Mat];
-        DAD.f[SE  ] = &distributionsAD[NW   * size_Mat];
-        DAD.f[BW  ] = &distributionsAD[TE   * size_Mat];
-        DAD.f[TE  ] = &distributionsAD[BW   * size_Mat];
-        DAD.f[TW  ] = &distributionsAD[BE   * size_Mat];
-        DAD.f[BE  ] = &distributionsAD[TW   * size_Mat];
-        DAD.f[BS  ] = &distributionsAD[TN   * size_Mat];
-        DAD.f[TN  ] = &distributionsAD[BS   * size_Mat];
-        DAD.f[TS  ] = &distributionsAD[BN   * size_Mat];
-        DAD.f[BN  ] = &distributionsAD[TS   * size_Mat];
-        DAD.f[REST] = &distributionsAD[REST * size_Mat];
-        DAD.f[TNE ] = &distributionsAD[BSW  * size_Mat];
-        DAD.f[TSW ] = &distributionsAD[BNE  * size_Mat];
-        DAD.f[TSE ] = &distributionsAD[BNW  * size_Mat];
-        DAD.f[TNW ] = &distributionsAD[BSE  * size_Mat];
-        DAD.f[BNE ] = &distributionsAD[TSW  * size_Mat];
-        DAD.f[BSW ] = &distributionsAD[TNE  * size_Mat];
-        DAD.f[BSE ] = &distributionsAD[TNW  * size_Mat];
-        DAD.f[BNW ] = &distributionsAD[TSE  * size_Mat];
+        DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
+        DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
+        DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
+        DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
+        DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
+        DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
+        DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
+        DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
+        DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
+        DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
+        DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
+        DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
+        DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
+        DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
+        DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
+        DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
+        DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
+        DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
+        DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
+        DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
+        DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
+        DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
+        DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
+        DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
+        DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
+        DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
+        DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
     }
     ////////////////////////////////////////////////////////////////////////////////
     const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -6890,32 +6888,32 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
             * q_dirBE, * q_dirTW, * q_dirTN, * q_dirBS, * q_dirBN, * q_dirTS,
             * q_dirTNE, * q_dirTSW, * q_dirTSE, * q_dirTNW, * q_dirBNE, * q_dirBSW,
             * q_dirBSE, * q_dirBNW;
-        q_dirE   = &Qarrays[E   * numberOfBCnodes];
-        q_dirW   = &Qarrays[W   * numberOfBCnodes];
-        q_dirN   = &Qarrays[N   * numberOfBCnodes];
-        q_dirS   = &Qarrays[S   * numberOfBCnodes];
-        q_dirT   = &Qarrays[T   * numberOfBCnodes];
-        q_dirB   = &Qarrays[B   * numberOfBCnodes];
-        q_dirNE  = &Qarrays[NE  * numberOfBCnodes];
-        q_dirSW  = &Qarrays[SW  * numberOfBCnodes];
-        q_dirSE  = &Qarrays[SE  * numberOfBCnodes];
-        q_dirNW  = &Qarrays[NW  * numberOfBCnodes];
-        q_dirTE  = &Qarrays[TE  * numberOfBCnodes];
-        q_dirBW  = &Qarrays[BW  * numberOfBCnodes];
-        q_dirBE  = &Qarrays[BE  * numberOfBCnodes];
-        q_dirTW  = &Qarrays[TW  * numberOfBCnodes];
-        q_dirTN  = &Qarrays[TN  * numberOfBCnodes];
-        q_dirBS  = &Qarrays[BS  * numberOfBCnodes];
-        q_dirBN  = &Qarrays[BN  * numberOfBCnodes];
-        q_dirTS  = &Qarrays[TS  * numberOfBCnodes];
-        q_dirTNE = &Qarrays[TNE * numberOfBCnodes];
-        q_dirTSW = &Qarrays[TSW * numberOfBCnodes];
-        q_dirTSE = &Qarrays[TSE * numberOfBCnodes];
-        q_dirTNW = &Qarrays[TNW * numberOfBCnodes];
-        q_dirBNE = &Qarrays[BNE * numberOfBCnodes];
-        q_dirBSW = &Qarrays[BSW * numberOfBCnodes];
-        q_dirBSE = &Qarrays[BSE * numberOfBCnodes];
-        q_dirBNW = &Qarrays[BNW * numberOfBCnodes];
+        q_dirE   = &Qarrays[DIR_P00   * numberOfBCnodes];
+        q_dirW   = &Qarrays[DIR_M00   * numberOfBCnodes];
+        q_dirN   = &Qarrays[DIR_0P0   * numberOfBCnodes];
+        q_dirS   = &Qarrays[DIR_0M0   * numberOfBCnodes];
+        q_dirT   = &Qarrays[DIR_00P   * numberOfBCnodes];
+        q_dirB   = &Qarrays[DIR_00M   * numberOfBCnodes];
+        q_dirNE  = &Qarrays[DIR_PP0  * numberOfBCnodes];
+        q_dirSW  = &Qarrays[DIR_MM0  * numberOfBCnodes];
+        q_dirSE  = &Qarrays[DIR_PM0  * numberOfBCnodes];
+        q_dirNW  = &Qarrays[DIR_MP0  * numberOfBCnodes];
+        q_dirTE  = &Qarrays[DIR_P0P  * numberOfBCnodes];
+        q_dirBW  = &Qarrays[DIR_M0M  * numberOfBCnodes];
+        q_dirBE  = &Qarrays[DIR_P0M  * numberOfBCnodes];
+        q_dirTW  = &Qarrays[DIR_M0P  * numberOfBCnodes];
+        q_dirTN  = &Qarrays[DIR_0PP  * numberOfBCnodes];
+        q_dirBS  = &Qarrays[DIR_0MM  * numberOfBCnodes];
+        q_dirBN  = &Qarrays[DIR_0PM  * numberOfBCnodes];
+        q_dirTS  = &Qarrays[DIR_0MP  * numberOfBCnodes];
+        q_dirTNE = &Qarrays[DIR_PPP * numberOfBCnodes];
+        q_dirTSW = &Qarrays[DIR_MMP * numberOfBCnodes];
+        q_dirTSE = &Qarrays[DIR_PMP * numberOfBCnodes];
+        q_dirTNW = &Qarrays[DIR_MPP * numberOfBCnodes];
+        q_dirBNE = &Qarrays[DIR_PPM * numberOfBCnodes];
+        q_dirBSW = &Qarrays[DIR_MMM * numberOfBCnodes];
+        q_dirBSE = &Qarrays[DIR_PMM * numberOfBCnodes];
+        q_dirBNW = &Qarrays[DIR_MPM * numberOfBCnodes];
         ////////////////////////////////////////////////////////////////////////////////
         //index
         unsigned int KQK   = QindexArray[k];
@@ -6950,37 +6948,37 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
         real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-        f_W   = (D.f[E])[ke];
-        f_E   = (D.f[W])[kw];
-        f_S   = (D.f[N])[kn];
-        f_N   = (D.f[S])[ks];
-        f_B   = (D.f[T])[kt];
-        f_T   = (D.f[B])[kb];
-        f_SW  = (D.f[NE])[kne];
-        f_NE  = (D.f[SW])[ksw];
-        f_NW  = (D.f[SE])[kse];
-        f_SE  = (D.f[NW])[knw];
-        f_BW  = (D.f[TE])[kte];
-        f_TE  = (D.f[BW])[kbw];
-        f_TW  = (D.f[BE])[kbe];
-        f_BE  = (D.f[TW])[ktw];
-        f_BS  = (D.f[TN])[ktn];
-        f_TN  = (D.f[BS])[kbs];
-        f_TS  = (D.f[BN])[kbn];
-        f_BN  = (D.f[TS])[kts];
-        f_BSW = (D.f[TNE])[ktne];
-        f_BNE = (D.f[TSW])[ktsw];
-        f_BNW = (D.f[TSE])[ktse];
-        f_BSE = (D.f[TNW])[ktnw];
-        f_TSW = (D.f[BNE])[kbne];
-        f_TNE = (D.f[BSW])[kbsw];
-        f_TNW = (D.f[BSE])[kbse];
-        f_TSE = (D.f[BNW])[kbnw];
+        f_W   = (D.f[DIR_P00])[ke];
+        f_E   = (D.f[DIR_M00])[kw];
+        f_S   = (D.f[DIR_0P0])[kn];
+        f_N   = (D.f[DIR_0M0])[ks];
+        f_B   = (D.f[DIR_00P])[kt];
+        f_T   = (D.f[DIR_00M])[kb];
+        f_SW  = (D.f[DIR_PP0])[kne];
+        f_NE  = (D.f[DIR_MM0])[ksw];
+        f_NW  = (D.f[DIR_PM0])[kse];
+        f_SE  = (D.f[DIR_MP0])[knw];
+        f_BW  = (D.f[DIR_P0P])[kte];
+        f_TE  = (D.f[DIR_M0M])[kbw];
+        f_TW  = (D.f[DIR_P0M])[kbe];
+        f_BE  = (D.f[DIR_M0P])[ktw];
+        f_BS  = (D.f[DIR_0PP])[ktn];
+        f_TN  = (D.f[DIR_0MM])[kbs];
+        f_TS  = (D.f[DIR_0PM])[kbn];
+        f_BN  = (D.f[DIR_0MP])[kts];
+        f_BSW = (D.f[DIR_PPP])[ktne];
+        f_BNE = (D.f[DIR_MMP])[ktsw];
+        f_BNW = (D.f[DIR_PMP])[ktse];
+        f_BSE = (D.f[DIR_MPP])[ktnw];
+        f_TSW = (D.f[DIR_PPM])[kbne];
+        f_TNE = (D.f[DIR_MMM])[kbsw];
+        f_TNW = (D.f[DIR_PMM])[kbse];
+        f_TSE = (D.f[DIR_MPM])[kbnw];
         ////////////////////////////////////////////////////////////////////////////////
         real vx1, vx2, vx3, drho, q;
         drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
         vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
             ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) +
@@ -6998,98 +6996,98 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
         real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
         ////////////////////////////////////////////////////////////////////////////////
-        f_W   = (DAD.f[E])[ke];
-        f_E   = (DAD.f[W])[kw];
-        f_S   = (DAD.f[N])[kn];
-        f_N   = (DAD.f[S])[ks];
-        f_B   = (DAD.f[T])[kt];
-        f_T   = (DAD.f[B])[kb];
-        f_SW  = (DAD.f[NE])[kne];
-        f_NE  = (DAD.f[SW])[ksw];
-        f_NW  = (DAD.f[SE])[kse];
-        f_SE  = (DAD.f[NW])[knw];
-        f_BW  = (DAD.f[TE])[kte];
-        f_TE  = (DAD.f[BW])[kbw];
-        f_TW  = (DAD.f[BE])[kbe];
-        f_BE  = (DAD.f[TW])[ktw];
-        f_BS  = (DAD.f[TN])[ktn];
-        f_TN  = (DAD.f[BS])[kbs];
-        f_TS  = (DAD.f[BN])[kbn];
-        f_BN  = (DAD.f[TS])[kts];
-        f_BSW = (DAD.f[TNE])[ktne];
-        f_BNE = (DAD.f[TSW])[ktsw];
-        f_BNW = (DAD.f[TSE])[ktse];
-        f_BSE = (DAD.f[TNW])[ktnw];
-        f_TSW = (DAD.f[BNE])[kbne];
-        f_TNE = (DAD.f[BSW])[kbsw];
-        f_TNW = (DAD.f[BSE])[kbse];
-        f_TSE = (DAD.f[BNW])[kbnw];
+        f_W   = (DAD.f[DIR_P00])[ke];
+        f_E   = (DAD.f[DIR_M00])[kw];
+        f_S   = (DAD.f[DIR_0P0])[kn];
+        f_N   = (DAD.f[DIR_0M0])[ks];
+        f_B   = (DAD.f[DIR_00P])[kt];
+        f_T   = (DAD.f[DIR_00M])[kb];
+        f_SW  = (DAD.f[DIR_PP0])[kne];
+        f_NE  = (DAD.f[DIR_MM0])[ksw];
+        f_NW  = (DAD.f[DIR_PM0])[kse];
+        f_SE  = (DAD.f[DIR_MP0])[knw];
+        f_BW  = (DAD.f[DIR_P0P])[kte];
+        f_TE  = (DAD.f[DIR_M0M])[kbw];
+        f_TW  = (DAD.f[DIR_P0M])[kbe];
+        f_BE  = (DAD.f[DIR_M0P])[ktw];
+        f_BS  = (DAD.f[DIR_0PP])[ktn];
+        f_TN  = (DAD.f[DIR_0MM])[kbs];
+        f_TS  = (DAD.f[DIR_0PM])[kbn];
+        f_BN  = (DAD.f[DIR_0MP])[kts];
+        f_BSW = (DAD.f[DIR_PPP])[ktne];
+        f_BNE = (DAD.f[DIR_MMP])[ktsw];
+        f_BNW = (DAD.f[DIR_PMP])[ktse];
+        f_BSE = (DAD.f[DIR_MPP])[ktnw];
+        f_TSW = (DAD.f[DIR_PPM])[kbne];
+        f_TNE = (DAD.f[DIR_MMM])[kbsw];
+        f_TNW = (DAD.f[DIR_PMM])[kbse];
+        f_TSE = (DAD.f[DIR_MPM])[kbnw];
         //////////////////////////////////////////////////////////////////////////
         if (!isEvenTimestep)
         {
-            DAD.f[E   ] = &distributionsAD[E    * size_Mat];
-            DAD.f[W   ] = &distributionsAD[W    * size_Mat];
-            DAD.f[N   ] = &distributionsAD[N    * size_Mat];
-            DAD.f[S   ] = &distributionsAD[S    * size_Mat];
-            DAD.f[T   ] = &distributionsAD[T    * size_Mat];
-            DAD.f[B   ] = &distributionsAD[B    * size_Mat];
-            DAD.f[NE  ] = &distributionsAD[NE   * size_Mat];
-            DAD.f[SW  ] = &distributionsAD[SW   * size_Mat];
-            DAD.f[SE  ] = &distributionsAD[SE   * size_Mat];
-            DAD.f[NW  ] = &distributionsAD[NW   * size_Mat];
-            DAD.f[TE  ] = &distributionsAD[TE   * size_Mat];
-            DAD.f[BW  ] = &distributionsAD[BW   * size_Mat];
-            DAD.f[BE  ] = &distributionsAD[BE   * size_Mat];
-            DAD.f[TW  ] = &distributionsAD[TW   * size_Mat];
-            DAD.f[TN  ] = &distributionsAD[TN   * size_Mat];
-            DAD.f[BS  ] = &distributionsAD[BS   * size_Mat];
-            DAD.f[BN  ] = &distributionsAD[BN   * size_Mat];
-            DAD.f[TS  ] = &distributionsAD[TS   * size_Mat];
-            DAD.f[REST] = &distributionsAD[REST * size_Mat];
-            DAD.f[TNE ] = &distributionsAD[TNE  * size_Mat];
-            DAD.f[TSW ] = &distributionsAD[TSW  * size_Mat];
-            DAD.f[TSE ] = &distributionsAD[TSE  * size_Mat];
-            DAD.f[TNW ] = &distributionsAD[TNW  * size_Mat];
-            DAD.f[BNE ] = &distributionsAD[BNE  * size_Mat];
-            DAD.f[BSW ] = &distributionsAD[BSW  * size_Mat];
-            DAD.f[BSE ] = &distributionsAD[BSE  * size_Mat];
-            DAD.f[BNW ] = &distributionsAD[BNW  * size_Mat];
+            DAD.f[DIR_P00   ] = &distributionsAD[DIR_P00    * size_Mat];
+            DAD.f[DIR_M00   ] = &distributionsAD[DIR_M00    * size_Mat];
+            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0    * size_Mat];
+            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0    * size_Mat];
+            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00P    * size_Mat];
+            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00M    * size_Mat];
+            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0   * size_Mat];
+            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0   * size_Mat];
+            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0   * size_Mat];
+            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0   * size_Mat];
+            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P   * size_Mat];
+            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M   * size_Mat];
+            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M   * size_Mat];
+            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P   * size_Mat];
+            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP   * size_Mat];
+            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM   * size_Mat];
+            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM   * size_Mat];
+            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP   * size_Mat];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
+            DAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP  * size_Mat];
+            DAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP  * size_Mat];
+            DAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP  * size_Mat];
+            DAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP  * size_Mat];
+            DAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM  * size_Mat];
+            DAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM  * size_Mat];
+            DAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM  * size_Mat];
+            DAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM  * size_Mat];
         }
         else
         {
-            DAD.f[W   ] = &distributionsAD[E    * size_Mat];
-            DAD.f[E   ] = &distributionsAD[W    * size_Mat];
-            DAD.f[S   ] = &distributionsAD[N    * size_Mat];
-            DAD.f[N   ] = &distributionsAD[S    * size_Mat];
-            DAD.f[B   ] = &distributionsAD[T    * size_Mat];
-            DAD.f[T   ] = &distributionsAD[B    * size_Mat];
-            DAD.f[SW  ] = &distributionsAD[NE   * size_Mat];
-            DAD.f[NE  ] = &distributionsAD[SW   * size_Mat];
-            DAD.f[NW  ] = &distributionsAD[SE   * size_Mat];
-            DAD.f[SE  ] = &distributionsAD[NW   * size_Mat];
-            DAD.f[BW  ] = &distributionsAD[TE   * size_Mat];
-            DAD.f[TE  ] = &distributionsAD[BW   * size_Mat];
-            DAD.f[TW  ] = &distributionsAD[BE   * size_Mat];
-            DAD.f[BE  ] = &distributionsAD[TW   * size_Mat];
-            DAD.f[BS  ] = &distributionsAD[TN   * size_Mat];
-            DAD.f[TN  ] = &distributionsAD[BS   * size_Mat];
-            DAD.f[TS  ] = &distributionsAD[BN   * size_Mat];
-            DAD.f[BN  ] = &distributionsAD[TS   * size_Mat];
-            DAD.f[REST] = &distributionsAD[REST * size_Mat];
-            DAD.f[TNE ] = &distributionsAD[BSW  * size_Mat];
-            DAD.f[TSW ] = &distributionsAD[BNE  * size_Mat];
-            DAD.f[TSE ] = &distributionsAD[BNW  * size_Mat];
-            DAD.f[TNW ] = &distributionsAD[BSE  * size_Mat];
-            DAD.f[BNE ] = &distributionsAD[TSW  * size_Mat];
-            DAD.f[BSW ] = &distributionsAD[TNE  * size_Mat];
-            DAD.f[BSE ] = &distributionsAD[TNW  * size_Mat];
-            DAD.f[BNW ] = &distributionsAD[TSE  * size_Mat];
+            DAD.f[DIR_M00   ] = &distributionsAD[DIR_P00    * size_Mat];
+            DAD.f[DIR_P00   ] = &distributionsAD[DIR_M00    * size_Mat];
+            DAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0    * size_Mat];
+            DAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0    * size_Mat];
+            DAD.f[DIR_00M   ] = &distributionsAD[DIR_00P    * size_Mat];
+            DAD.f[DIR_00P   ] = &distributionsAD[DIR_00M    * size_Mat];
+            DAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0   * size_Mat];
+            DAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0   * size_Mat];
+            DAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0   * size_Mat];
+            DAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0   * size_Mat];
+            DAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P   * size_Mat];
+            DAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M   * size_Mat];
+            DAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M   * size_Mat];
+            DAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P   * size_Mat];
+            DAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP   * size_Mat];
+            DAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM   * size_Mat];
+            DAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM   * size_Mat];
+            DAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP   * size_Mat];
+            DAD.f[DIR_000] = &distributionsAD[DIR_000 * size_Mat];
+            DAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM  * size_Mat];
+            DAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM  * size_Mat];
+            DAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM  * size_Mat];
+            DAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM  * size_Mat];
+            DAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP  * size_Mat];
+            DAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP  * size_Mat];
+            DAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP  * size_Mat];
+            DAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP  * size_Mat];
         }
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
         real concentration =
             f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+            f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
         real jx1 =
             (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
@@ -7117,32 +7115,32 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
         real jTan3 = jx3 - NormJ * NormZ;
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[W  ])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
-        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[E  ])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
-        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[S  ])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
-        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[N  ])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
-        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[B  ])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
-        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[T  ])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
-        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[SW ])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
-        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[NE ])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
-        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[NW ])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
-        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[SE ])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
-        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[BW ])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
-        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[TE ])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
-        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[TW ])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
-        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[BE ])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
-        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[BS ])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
-        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[TN ])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
-        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[TS ])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
-        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[BN ])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
-        q = q_dirTNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[BSW])[kbsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2+vx3, cu_sq, f_TNE, f_BSW, omegaDiffusivity,  jTan1+jTan2+jTan3, concentration); }
-        q = q_dirBSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[TNE])[ktne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2-vx3, cu_sq, f_BSW, f_TNE, omegaDiffusivity, -jTan1-jTan2-jTan3, concentration); }
-        q = q_dirBNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[TSW])[ktsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2-vx3, cu_sq, f_BNE, f_TSW, omegaDiffusivity,  jTan1+jTan2-jTan3, concentration); }
-        q = q_dirTSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[BNE])[kbne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2+vx3, cu_sq, f_TSW, f_BNE, omegaDiffusivity, -jTan1-jTan2+jTan3, concentration); }
-        q = q_dirTSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[BNW])[kbnw] = calcDistributionBC_AD(q, c1o216,  vx1-vx2+vx3, cu_sq, f_TSE, f_BNW, omegaDiffusivity,  jTan1-jTan2+jTan3, concentration); }
-        q = q_dirBNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[TSE])[ktse] = calcDistributionBC_AD(q, c1o216, -vx1+vx2-vx3, cu_sq, f_BNW, f_TSE, omegaDiffusivity, -jTan1+jTan2-jTan3, concentration); }
-        q = q_dirBSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[TNW])[ktnw] = calcDistributionBC_AD(q, c1o216,  vx1-vx2-vx3, cu_sq, f_BSE, f_TNW, omegaDiffusivity,  jTan1-jTan2-jTan3, concentration); }
-        q = q_dirTNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[BSE])[kbse] = calcDistributionBC_AD(q, c1o216, -vx1+vx2+vx3, cu_sq, f_TNW, f_BSE, omegaDiffusivity, -jTan1+jTan2+jTan3, concentration); }
+        q = q_dirE[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M00  ])[kw  ] = calcDistributionBC_AD(q, c2o27,   vx1,         cu_sq, f_E,   f_W,   omegaDiffusivity,        jTan1,       concentration); }
+        q = q_dirW[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P00  ])[ke  ] = calcDistributionBC_AD(q, c2o27,  -vx1,         cu_sq, f_W,   f_E,   omegaDiffusivity,       -jTan1,       concentration); }
+        q = q_dirN[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0M0  ])[ks  ] = calcDistributionBC_AD(q, c2o27,   vx2,         cu_sq, f_N,   f_S,   omegaDiffusivity,        jTan2,       concentration); }
+        q = q_dirS[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0P0  ])[kn  ] = calcDistributionBC_AD(q, c2o27,  -vx2,         cu_sq, f_S,   f_N,   omegaDiffusivity,       -jTan2,       concentration); }
+        q = q_dirT[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00M  ])[kb  ] = calcDistributionBC_AD(q, c2o27,   vx3,         cu_sq, f_T,   f_B,   omegaDiffusivity,        jTan3,       concentration); }
+        q = q_dirB[k];   if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_00P  ])[kt  ] = calcDistributionBC_AD(q, c2o27,  -vx3,         cu_sq, f_B,   f_T,   omegaDiffusivity,       -jTan3,       concentration); }
+        q = q_dirNE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MM0 ])[ksw ] = calcDistributionBC_AD(q, c1o54,   vx1+vx2,     cu_sq, f_NE,  f_SW,  omegaDiffusivity,  jTan1+jTan2,       concentration); }
+        q = q_dirSW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PP0 ])[kne ] = calcDistributionBC_AD(q, c1o54,  -vx1-vx2,     cu_sq, f_SW,  f_NE,  omegaDiffusivity, -jTan1-jTan2,       concentration); }
+        q = q_dirSE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MP0 ])[knw ] = calcDistributionBC_AD(q, c1o54,   vx1-vx2,     cu_sq, f_SE,  f_NW,  omegaDiffusivity,  jTan1-jTan2,       concentration); }
+        q = q_dirNW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PM0 ])[kse ] = calcDistributionBC_AD(q, c1o54,  -vx1+vx2,     cu_sq, f_NW,  f_SE,  omegaDiffusivity, -jTan1+jTan2,       concentration); }
+        q = q_dirTE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0M ])[kbw ] = calcDistributionBC_AD(q, c1o54,   vx1    +vx3, cu_sq, f_TE,  f_BW,  omegaDiffusivity,  jTan1      +jTan3, concentration); }
+        q = q_dirBW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0P ])[kte ] = calcDistributionBC_AD(q, c1o54,  -vx1    -vx3, cu_sq, f_BW,  f_TE,  omegaDiffusivity, -jTan1      -jTan3, concentration); }
+        q = q_dirBE[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_M0P ])[ktw ] = calcDistributionBC_AD(q, c1o54,   vx1    -vx3, cu_sq, f_BE,  f_TW,  omegaDiffusivity,  jTan1      -jTan3, concentration); }
+        q = q_dirTW[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_P0M ])[kbe ] = calcDistributionBC_AD(q, c1o54,  -vx1    +vx3, cu_sq, f_TW,  f_BE,  omegaDiffusivity, -jTan1      +jTan3, concentration); }
+        q = q_dirTN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MM ])[kbs ] = calcDistributionBC_AD(q, c1o54,       vx2+vx3, cu_sq, f_TN,  f_BS,  omegaDiffusivity,        jTan2+jTan3, concentration); }
+        q = q_dirBS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PP ])[ktn ] = calcDistributionBC_AD(q, c1o54,      -vx2-vx3, cu_sq, f_BS,  f_TN,  omegaDiffusivity,       -jTan2-jTan3, concentration); }
+        q = q_dirBN[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0MP ])[kts ] = calcDistributionBC_AD(q, c1o54,       vx2-vx3, cu_sq, f_BN,  f_TS,  omegaDiffusivity,        jTan2-jTan3, concentration); }
+        q = q_dirTS[k];  if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_0PM ])[kbn ] = calcDistributionBC_AD(q, c1o54,      -vx2+vx3, cu_sq, f_TS,  f_BN,  omegaDiffusivity,       -jTan2+jTan3, concentration); }
+        q = q_dirTNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMM])[kbsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2+vx3, cu_sq, f_TNE, f_BSW, omegaDiffusivity,  jTan1+jTan2+jTan3, concentration); }
+        q = q_dirBSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PPP])[ktne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2-vx3, cu_sq, f_BSW, f_TNE, omegaDiffusivity, -jTan1-jTan2-jTan3, concentration); }
+        q = q_dirBNE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MMP])[ktsw] = calcDistributionBC_AD(q, c1o216,  vx1+vx2-vx3, cu_sq, f_BNE, f_TSW, omegaDiffusivity,  jTan1+jTan2-jTan3, concentration); }
+        q = q_dirTSW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PPM])[kbne] = calcDistributionBC_AD(q, c1o216, -vx1-vx2+vx3, cu_sq, f_TSW, f_BNE, omegaDiffusivity, -jTan1-jTan2+jTan3, concentration); }
+        q = q_dirTSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MPM])[kbnw] = calcDistributionBC_AD(q, c1o216,  vx1-vx2+vx3, cu_sq, f_TSE, f_BNW, omegaDiffusivity,  jTan1-jTan2+jTan3, concentration); }
+        q = q_dirBNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PMP])[ktse] = calcDistributionBC_AD(q, c1o216, -vx1+vx2-vx3, cu_sq, f_BNW, f_TSE, omegaDiffusivity, -jTan1+jTan2-jTan3, concentration); }
+        q = q_dirBSE[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_MPP])[ktnw] = calcDistributionBC_AD(q, c1o216,  vx1-vx2-vx3, cu_sq, f_BSE, f_TNW, omegaDiffusivity,  jTan1-jTan2-jTan3, concentration); }
+        q = q_dirTNW[k]; if (q >= c0o1 && q <= c1o1) { (DAD.f[DIR_PMM])[kbse] = calcDistributionBC_AD(q, c1o216, -vx1+vx2+vx3, cu_sq, f_TNW, f_BSE, omegaDiffusivity, -jTan1+jTan2+jTan3, concentration); }
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
index 9a02f5544b9f7ddb2228e87ca9a35cbd7c332a09..1ef111330c0d4293c14d66893847689ad8fac77f 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CP27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcCP27(real* DD, 
+__global__ void CalcCP27(real* DD, 
 									int* cpIndex, 
 									int nonCp, 
 									double *cpPress,
@@ -20,63 +20,63 @@ extern "C" __global__ void CalcCP27(real* DD,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -124,20 +124,20 @@ extern "C" __global__ void CalcCP27(real* DD,
 		////////////////////////////////////////////////////////////////////////////////
 		double PressCP;
 
-		PressCP  =   (D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                     (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                     (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
-                     (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                     (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                     (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                     (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                     (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                     (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                     (D.f[REST])[kzero]+ 
-                     (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                     (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                     (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                     (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw];
+		PressCP  =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                     (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                     (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
+                     (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                     (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                     (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                     (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                     (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                     (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                     (D.f[DIR_000])[kzero]+ 
+                     (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                     (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                     (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                     (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		cpPress[k] = PressCP;
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
index e3755ff2796ff180f0a3ae139f1c450cfcc4296a..ce8fe68cd6a2e8f09f150cb0ccdec502a6278b50 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Calc2ndMoments27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* kyzFromfcNEQ,
 														real* kxzFromfcNEQ,
 														real* kxxMyyFromfcNEQ,
@@ -23,63 +23,63 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -125,33 +125,33 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
       unsigned int kbsw = neighborZ[ksw];
       //////////////////////////////////////////////////////////////////////////
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,/*f_ZERO,*/f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[E   ])[ke   ];
-	  f_W    = (D.f[W   ])[kw   ];
-	  f_N    = (D.f[N   ])[kn   ];
-	  f_S    = (D.f[S   ])[ks   ];
-	  f_T    = (D.f[T   ])[kt   ];
-	  f_B    = (D.f[B   ])[kb   ];
-	  f_NE   = (D.f[NE  ])[kne  ];
-	  f_SW   = (D.f[SW  ])[ksw  ];
-	  f_SE   = (D.f[SE  ])[kse  ];
-	  f_NW   = (D.f[NW  ])[knw  ];
-	  f_TE   = (D.f[TE  ])[kte  ];
-	  f_BW   = (D.f[BW  ])[kbw  ];
-	  f_BE   = (D.f[BE  ])[kbe  ];
-	  f_TW   = (D.f[TW  ])[ktw  ];
-	  f_TN   = (D.f[TN  ])[ktn  ];
-	  f_BS   = (D.f[BS  ])[kbs  ];
-	  f_BN   = (D.f[BN  ])[kbn  ];
-	  f_TS   = (D.f[TS  ])[kts  ];
-	  //f_ZERO = (D.f[REST])[kzero];
-	  f_TNE  = (D.f[TNE ])[ktne ];
-	  f_TSW  = (D.f[TSW ])[ktsw ];
-	  f_TSE  = (D.f[TSE ])[ktse ];
-	  f_TNW  = (D.f[TNW ])[ktnw ];
-	  f_BNE  = (D.f[BNE ])[kbne ];
-	  f_BSW  = (D.f[BSW ])[kbsw ];
-	  f_BSE  = (D.f[BSE ])[kbse ];
-	  f_BNW  = (D.f[BNW ])[kbnw ];
+	  f_E    = (D.f[DIR_P00   ])[ke   ];
+	  f_W    = (D.f[DIR_M00   ])[kw   ];
+	  f_N    = (D.f[DIR_0P0   ])[kn   ];
+	  f_S    = (D.f[DIR_0M0   ])[ks   ];
+	  f_T    = (D.f[DIR_00P   ])[kt   ];
+	  f_B    = (D.f[DIR_00M   ])[kb   ];
+	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
+	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
+	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
+	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
+	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  //f_ZERO = (D.f[DIR_000])[kzero];
+	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3;
       kxyFromfcNEQ[k]       = c0o1;
@@ -168,8 +168,8 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 		  kxyFromfcNEQ[k]    = -c3o1 *(f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE-(vx1*vx2));
 		  kyzFromfcNEQ[k]    = -c3o1 *(f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW-(vx2*vx3));
 		  kxzFromfcNEQ[k]    = -c3o1 *(f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE-(vx1*vx3));
-		  kxxMyyFromfcNEQ[k] = -c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all E+W minus all N+S (no combinations of xy left)
-		  kxxMzzFromfcNEQ[k] = -c3o2 * (f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE-(vx1*vx1-vx3*vx3));		//all E+W minus all T+B (no combinations of xz left)
+		  kxxMyyFromfcNEQ[k] = -c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all DIR_P00+DIR_M00 minus all DIR_0P0+DIR_0M0 (no combinations of xy left)
+		  kxxMzzFromfcNEQ[k] = -c3o2 * (f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE-(vx1*vx1-vx3*vx3));		//all DIR_P00+DIR_M00 minus all DIR_00P+DIR_00M (no combinations of xz left)
       }
    }
 }
@@ -206,7 +206,7 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* kyzFromfcNEQ,
 													real* kxzFromfcNEQ,
 													real* kxxMyyFromfcNEQ,
@@ -222,63 +222,63 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -325,33 +325,33 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
       //////////////////////////////////////////////////////////////////////////
       real f_ZERO;
       real        f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
-	  f_E    = (D.f[E   ])[ke   ];
-	  f_W    = (D.f[W   ])[kw   ];
-	  f_N    = (D.f[N   ])[kn   ];
-	  f_S    = (D.f[S   ])[ks   ];
-	  f_T    = (D.f[T   ])[kt   ];
-	  f_B    = (D.f[B   ])[kb   ];
-	  f_NE   = (D.f[NE  ])[kne  ];
-	  f_SW   = (D.f[SW  ])[ksw  ];
-	  f_SE   = (D.f[SE  ])[kse  ];
-	  f_NW   = (D.f[NW  ])[knw  ];
-	  f_TE   = (D.f[TE  ])[kte  ];
-	  f_BW   = (D.f[BW  ])[kbw  ];
-	  f_BE   = (D.f[BE  ])[kbe  ];
-	  f_TW   = (D.f[TW  ])[ktw  ];
-	  f_TN   = (D.f[TN  ])[ktn  ];
-	  f_BS   = (D.f[BS  ])[kbs  ];
-	  f_BN   = (D.f[BN  ])[kbn  ];
-	  f_TS   = (D.f[TS  ])[kts  ];
-	  f_ZERO = (D.f[REST])[kzero];
-	  f_TNE  = (D.f[TNE ])[ktne ];
-	  f_TSW  = (D.f[TSW ])[ktsw ];
-	  f_TSE  = (D.f[TSE ])[ktse ];
-	  f_TNW  = (D.f[TNW ])[ktnw ];
-	  f_BNE  = (D.f[BNE ])[kbne ];
-	  f_BSW  = (D.f[BSW ])[kbsw ];
-	  f_BSE  = (D.f[BSE ])[kbse ];
-	  f_BNW  = (D.f[BNW ])[kbnw ];
+	  f_E    = (D.f[DIR_P00   ])[ke   ];
+	  f_W    = (D.f[DIR_M00   ])[kw   ];
+	  f_N    = (D.f[DIR_0P0   ])[kn   ];
+	  f_S    = (D.f[DIR_0M0   ])[ks   ];
+	  f_T    = (D.f[DIR_00P   ])[kt   ];
+	  f_B    = (D.f[DIR_00M   ])[kb   ];
+	  f_NE   = (D.f[DIR_PP0  ])[kne  ];
+	  f_SW   = (D.f[DIR_MM0  ])[ksw  ];
+	  f_SE   = (D.f[DIR_PM0  ])[kse  ];
+	  f_NW   = (D.f[DIR_MP0  ])[knw  ];
+	  f_TE   = (D.f[DIR_P0P  ])[kte  ];
+	  f_BW   = (D.f[DIR_M0M  ])[kbw  ];
+	  f_BE   = (D.f[DIR_P0M  ])[kbe  ];
+	  f_TW   = (D.f[DIR_M0P  ])[ktw  ];
+	  f_TN   = (D.f[DIR_0PP  ])[ktn  ];
+	  f_BS   = (D.f[DIR_0MM  ])[kbs  ];
+	  f_BN   = (D.f[DIR_0PM  ])[kbn  ];
+	  f_TS   = (D.f[DIR_0MP  ])[kts  ];
+	  f_ZERO = (D.f[DIR_000])[kzero];
+	  f_TNE  = (D.f[DIR_PPP ])[ktne ];
+	  f_TSW  = (D.f[DIR_MMP ])[ktsw ];
+	  f_TSE  = (D.f[DIR_PMP ])[ktse ];
+	  f_TNW  = (D.f[DIR_MPP ])[ktnw ];
+	  f_BNE  = (D.f[DIR_PPM ])[kbne ];
+	  f_BSW  = (D.f[DIR_MMM ])[kbsw ];
+	  f_BSE  = (D.f[DIR_PMM ])[kbse ];
+	  f_BNW  = (D.f[DIR_MPM ])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
 	  real drho;
 	  real vx1, vx2, vx3, rho;
@@ -373,8 +373,8 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 		  kxyFromfcNEQ[k]    = -c3o1 *(f_SW+f_BSW+f_TSW-f_NW-f_BNW-f_TNW-f_SE-f_BSE-f_TSE+f_NE+f_BNE+f_TNE-(vx1*vx2));
 		  kyzFromfcNEQ[k]    = -c3o1 *(f_BS+f_BSE+f_BSW-f_TS-f_TSE-f_TSW-f_BN-f_BNE-f_BNW+f_TN+f_TNE+f_TNW-(vx2*vx3));
 		  kxzFromfcNEQ[k]    = -c3o1 *(f_BW+f_BSW+f_BNW-f_TW-f_TSW-f_TNW-f_BE-f_BSE-f_BNE+f_TE+f_TSE+f_TNE-(vx1*vx3));
-		  kxxMyyFromfcNEQ[k] = -c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all E+W minus all N+S (no combinations of xy left)
-		  kxxMzzFromfcNEQ[k] = -c3o2 * (f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE-(vx1*vx1-vx3*vx3));		//all E+W minus all T+B (no combinations of xz left)
+		  kxxMyyFromfcNEQ[k] = -c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all DIR_P00+DIR_M00 minus all DIR_0P0+DIR_0M0 (no combinations of xy left)
+		  kxxMzzFromfcNEQ[k] = -c3o2 * (f_SW+f_W+f_NW-f_BS-f_TS-f_B-f_T-f_BN-f_TN+f_SE+f_E+f_NE-(vx1*vx1-vx3*vx3));		//all DIR_P00+DIR_M00 minus all DIR_00P+DIR_00M (no combinations of xz left)
       }
    }
 }
@@ -411,7 +411,7 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
+__global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														real* CUMabc,
 														real* CUMbac,
 														real* CUMbca,
@@ -448,63 +448,63 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -517,33 +517,33 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -845,7 +845,7 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
+__global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													real* CUMabc,
 													real* CUMbac,
 													real* CUMbca,
@@ -882,63 +882,63 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -951,33 +951,33 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1283,7 +1283,7 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
+__global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															real* CUMbcb,
 															real* CUMbbc,
 															real* CUMcca,
@@ -1323,63 +1323,63 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1392,33 +1392,33 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real vvx    =((((mfccc-mfaaa) + (mfcac-mfaca)) + ((mfcaa-mfacc) + (mfcca-mfaac))) + 
 						     (((mfcba-mfabc) + (mfcbc-mfaba)) + ((mfcab-mfacb) + (mfccb-mfaab))) +
@@ -1737,7 +1737,7 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
+__global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														real* CUMbcb,
 														real* CUMbbc,
 														real* CUMcca,
@@ -1777,63 +1777,63 @@ extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1846,33 +1846,33 @@ extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 			unsigned int kbs  = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -2131,12 +2131,12 @@ extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 
 			real OxxPyyPzz = c1o1;
 			real omega = c1o1 / (c3o1*0.001 + c1o2);
-			real B = (c4o1 * omega * OxxPyyPzz * (c9o1 * omega - c16o1) - c4o1 * omega * omega - c2o1 * OxxPyyPzz * OxxPyyPzz * (c2o1 + c9o1 * omega * (omega - c2o1))) /
+			real DIR_00M = (c4o1 * omega * OxxPyyPzz * (c9o1 * omega - c16o1) - c4o1 * omega * omega - c2o1 * OxxPyyPzz * OxxPyyPzz * (c2o1 + c9o1 * omega * (omega - c2o1))) /
 				(c3o1 * (omega - OxxPyyPzz) * (OxxPyyPzz * (c2o1 + c3o1 * omega) - c8o1 * omega));
 
-			CUMbcc[k] = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)*(c1o1 + rho*c6o1*B / (c2o1 + c3o1 * B))) / rho;
-			CUMcbc[k] = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)*(c1o1 + rho*c6o1*B / (c2o1 + c3o1 * B))) / rho;
-			CUMccb[k] = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)*(c1o1 + rho*c6o1*B / (c2o1 + c3o1 * B))) / rho;
+			CUMbcc[k] = mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) + c1o3 * (mfbca + mfbac)*(c1o1 + rho*c6o1*DIR_00M / (c2o1 + c3o1 * DIR_00M))) / rho;
+			CUMcbc[k] = mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) + c1o3 * (mfcba + mfabc)*(c1o1 + rho*c6o1*DIR_00M / (c2o1 + c3o1 * DIR_00M))) / rho;
+			CUMccb[k] = mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) + c1o3 * (mfacb + mfcab)*(c1o1 + rho*c6o1*DIR_00M / (c2o1 + c3o1 * DIR_00M))) / rho;
 
 			////////////////////////////////////////////////////////////////////////////////////
 			// Cumulants
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
index c71f58bdfd69397338959162ee167527ec9d4380..d246f39a030b6df0b249aee17f37b7d5258ff00d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcConc27.cu
@@ -39,7 +39,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcConc27(
+__global__ void CalcConc27(
 	real* concentration,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -76,63 +76,63 @@ extern "C" __global__ void CalcConc27(
       Distributions27 distAD;
       if (isEvenTimestep)
       {
-         distAD.f[E   ] = &distributionsAD[E   *size_Mat];
-         distAD.f[W   ] = &distributionsAD[W   *size_Mat];
-         distAD.f[N   ] = &distributionsAD[N   *size_Mat];
-         distAD.f[S   ] = &distributionsAD[S   *size_Mat];
-         distAD.f[T   ] = &distributionsAD[T   *size_Mat];
-         distAD.f[B   ] = &distributionsAD[B   *size_Mat];
-         distAD.f[NE  ] = &distributionsAD[NE  *size_Mat];
-         distAD.f[SW  ] = &distributionsAD[SW  *size_Mat];
-         distAD.f[SE  ] = &distributionsAD[SE  *size_Mat];
-         distAD.f[NW  ] = &distributionsAD[NW  *size_Mat];
-         distAD.f[TE  ] = &distributionsAD[TE  *size_Mat];
-         distAD.f[BW  ] = &distributionsAD[BW  *size_Mat];
-         distAD.f[BE  ] = &distributionsAD[BE  *size_Mat];
-         distAD.f[TW  ] = &distributionsAD[TW  *size_Mat];
-         distAD.f[TN  ] = &distributionsAD[TN  *size_Mat];
-         distAD.f[BS  ] = &distributionsAD[BS  *size_Mat];
-         distAD.f[BN  ] = &distributionsAD[BN  *size_Mat];
-         distAD.f[TS  ] = &distributionsAD[TS  *size_Mat];
-         distAD.f[REST] = &distributionsAD[REST*size_Mat];
-         distAD.f[TNE ] = &distributionsAD[TNE *size_Mat];
-         distAD.f[TSW ] = &distributionsAD[TSW *size_Mat];
-         distAD.f[TSE ] = &distributionsAD[TSE *size_Mat];
-         distAD.f[TNW ] = &distributionsAD[TNW *size_Mat];
-         distAD.f[BNE ] = &distributionsAD[BNE *size_Mat];
-         distAD.f[BSW ] = &distributionsAD[BSW *size_Mat];
-         distAD.f[BSE ] = &distributionsAD[BSE *size_Mat];
-         distAD.f[BNW ] = &distributionsAD[BNW *size_Mat];
+         distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
+         distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
+         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
+         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
+         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
+         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
+         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
+         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
+         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
+         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
+         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
+         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+         distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
+         distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
+         distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
+         distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
+         distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
+         distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
+         distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
+         distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
       }
       else
       {
-         distAD.f[W   ] = &distributionsAD[E   *size_Mat];
-         distAD.f[E   ] = &distributionsAD[W   *size_Mat];
-         distAD.f[S   ] = &distributionsAD[N   *size_Mat];
-         distAD.f[N   ] = &distributionsAD[S   *size_Mat];
-         distAD.f[B   ] = &distributionsAD[T   *size_Mat];
-         distAD.f[T   ] = &distributionsAD[B   *size_Mat];
-         distAD.f[SW  ] = &distributionsAD[NE  *size_Mat];
-         distAD.f[NE  ] = &distributionsAD[SW  *size_Mat];
-         distAD.f[NW  ] = &distributionsAD[SE  *size_Mat];
-         distAD.f[SE  ] = &distributionsAD[NW  *size_Mat];
-         distAD.f[BW  ] = &distributionsAD[TE  *size_Mat];
-         distAD.f[TE  ] = &distributionsAD[BW  *size_Mat];
-         distAD.f[TW  ] = &distributionsAD[BE  *size_Mat];
-         distAD.f[BE  ] = &distributionsAD[TW  *size_Mat];
-         distAD.f[BS  ] = &distributionsAD[TN  *size_Mat];
-         distAD.f[TN  ] = &distributionsAD[BS  *size_Mat];
-         distAD.f[TS  ] = &distributionsAD[BN  *size_Mat];
-         distAD.f[BN  ] = &distributionsAD[TS  *size_Mat];
-         distAD.f[REST] = &distributionsAD[REST*size_Mat];
-         distAD.f[TNE ] = &distributionsAD[BSW *size_Mat];
-         distAD.f[TSW ] = &distributionsAD[BNE *size_Mat];
-         distAD.f[TSE ] = &distributionsAD[BNW *size_Mat];
-         distAD.f[TNW ] = &distributionsAD[BSE *size_Mat];
-         distAD.f[BNE ] = &distributionsAD[TSW *size_Mat];
-         distAD.f[BSW ] = &distributionsAD[TNE *size_Mat];
-         distAD.f[BSE ] = &distributionsAD[TNW *size_Mat];
-         distAD.f[BNW ] = &distributionsAD[TSE *size_Mat];
+         distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
+         distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
+         distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+         distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+         distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
+         distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
+         distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+         distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+         distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+         distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+         distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
+         distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
+         distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
+         distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
+         distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
+         distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
+         distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
+         distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
+         distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+         distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
+         distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
+         distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
+         distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
+         distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
+         distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
+         distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
+         distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
       }
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set neighbor indices (necessary for indirect addressing)
@@ -166,33 +166,33 @@ extern "C" __global__ void CalcConc27(
 	  ////////////////////////////////////////////////////////////////////////////////
 	  //! - Set local distributions
 	  //!
-	  real mfcbb = (distAD.f[E   ])[ke  ];
-	  real mfabb = (distAD.f[W   ])[kw  ];
-	  real mfbcb = (distAD.f[N   ])[kn  ];
-	  real mfbab = (distAD.f[S   ])[ks  ];
-	  real mfbbc = (distAD.f[T   ])[kt  ];
-	  real mfbba = (distAD.f[B   ])[kb  ];
-	  real mfccb = (distAD.f[NE  ])[kne ];
-	  real mfaab = (distAD.f[SW  ])[ksw ];
-	  real mfcab = (distAD.f[SE  ])[kse ];
-	  real mfacb = (distAD.f[NW  ])[knw ];
-	  real mfcbc = (distAD.f[TE  ])[kte ];
-	  real mfaba = (distAD.f[BW  ])[kbw ];
-	  real mfcba = (distAD.f[BE  ])[kbe ];
-	  real mfabc = (distAD.f[TW  ])[ktw ];
-	  real mfbcc = (distAD.f[TN  ])[ktn ];
-	  real mfbaa = (distAD.f[BS  ])[kbs ];
-	  real mfbca = (distAD.f[BN  ])[kbn ];
-	  real mfbac = (distAD.f[TS  ])[kts ];
-	  real mfbbb = (distAD.f[REST])[k   ];
-	  real mfccc = (distAD.f[TNE ])[ktne];
-	  real mfaac = (distAD.f[TSW ])[ktsw];
-	  real mfcac = (distAD.f[TSE ])[ktse];
-	  real mfacc = (distAD.f[TNW ])[ktnw];
-	  real mfcca = (distAD.f[BNE ])[kbne];
-	  real mfaaa = (distAD.f[BSW ])[kbsw];
-	  real mfcaa = (distAD.f[BSE ])[kbse];
-	  real mfaca = (distAD.f[BNW ])[kbnw];
+	  real mfcbb = (distAD.f[DIR_P00   ])[ke  ];
+	  real mfabb = (distAD.f[DIR_M00   ])[kw  ];
+	  real mfbcb = (distAD.f[DIR_0P0   ])[kn  ];
+	  real mfbab = (distAD.f[DIR_0M0   ])[ks  ];
+	  real mfbbc = (distAD.f[DIR_00P   ])[kt  ];
+	  real mfbba = (distAD.f[DIR_00M   ])[kb  ];
+	  real mfccb = (distAD.f[DIR_PP0  ])[kne ];
+	  real mfaab = (distAD.f[DIR_MM0  ])[ksw ];
+	  real mfcab = (distAD.f[DIR_PM0  ])[kse ];
+	  real mfacb = (distAD.f[DIR_MP0  ])[knw ];
+	  real mfcbc = (distAD.f[DIR_P0P  ])[kte ];
+	  real mfaba = (distAD.f[DIR_M0M  ])[kbw ];
+	  real mfcba = (distAD.f[DIR_P0M  ])[kbe ];
+	  real mfabc = (distAD.f[DIR_M0P  ])[ktw ];
+	  real mfbcc = (distAD.f[DIR_0PP  ])[ktn ];
+	  real mfbaa = (distAD.f[DIR_0MM  ])[kbs ];
+	  real mfbca = (distAD.f[DIR_0PM  ])[kbn ];
+	  real mfbac = (distAD.f[DIR_0MP  ])[kts ];
+	  real mfbbb = (distAD.f[DIR_000])[k   ];
+	  real mfccc = (distAD.f[DIR_PPP ])[ktne];
+	  real mfaac = (distAD.f[DIR_MMP ])[ktsw];
+	  real mfcac = (distAD.f[DIR_PMP ])[ktse];
+	  real mfacc = (distAD.f[DIR_MPP ])[ktnw];
+	  real mfcca = (distAD.f[DIR_PPM ])[kbne];
+	  real mfaaa = (distAD.f[DIR_MMM ])[kbsw];
+	  real mfcaa = (distAD.f[DIR_PMM ])[kbse];
+	  real mfaca = (distAD.f[DIR_MPM ])[kbnw];
       //////////////////////////////////////////////////////////////////////////
 	  //! - Calculate concentration using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
 	  //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015), DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
@@ -224,7 +224,7 @@ extern "C" __global__ void CalcConc27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcConc7( real* Conc,
+__global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -315,7 +315,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
 
 // DEPRECATED (2022)
 //////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void LBCalcMacThS27(real* Conc,
+// __global__ void LBCalcMacThS27(real* Conc,
 //                                           unsigned int* geoD,
 //                                           unsigned int* neighborX,
 //                                           unsigned int* neighborY,
@@ -327,63 +327,63 @@ extern "C" __global__ void CalcConc7( real* Conc,
 //    Distributions27 D27;
 //    if (isEvenTimestep==true)
 //    {
-//       D27.f[E   ] = &DD27[E   *size_Mat];
-//       D27.f[W   ] = &DD27[W   *size_Mat];
-//       D27.f[N   ] = &DD27[N   *size_Mat];
-//       D27.f[S   ] = &DD27[S   *size_Mat];
-//       D27.f[T   ] = &DD27[T   *size_Mat];
-//       D27.f[B   ] = &DD27[B   *size_Mat];
-//       D27.f[NE  ] = &DD27[NE  *size_Mat];
-//       D27.f[SW  ] = &DD27[SW  *size_Mat];
-//       D27.f[SE  ] = &DD27[SE  *size_Mat];
-//       D27.f[NW  ] = &DD27[NW  *size_Mat];
-//       D27.f[TE  ] = &DD27[TE  *size_Mat];
-//       D27.f[BW  ] = &DD27[BW  *size_Mat];
-//       D27.f[BE  ] = &DD27[BE  *size_Mat];
-//       D27.f[TW  ] = &DD27[TW  *size_Mat];
-//       D27.f[TN  ] = &DD27[TN  *size_Mat];
-//       D27.f[BS  ] = &DD27[BS  *size_Mat];
-//       D27.f[BN  ] = &DD27[BN  *size_Mat];
-//       D27.f[TS  ] = &DD27[TS  *size_Mat];
-//       D27.f[REST] = &DD27[REST*size_Mat];
-//       D27.f[TNE ] = &DD27[TNE *size_Mat];
-//       D27.f[TSW ] = &DD27[TSW *size_Mat];
-//       D27.f[TSE ] = &DD27[TSE *size_Mat];
-//       D27.f[TNW ] = &DD27[TNW *size_Mat];
-//       D27.f[BNE ] = &DD27[BNE *size_Mat];
-//       D27.f[BSW ] = &DD27[BSW *size_Mat];
-//       D27.f[BSE ] = &DD27[BSE *size_Mat];
-//       D27.f[BNW ] = &DD27[BNW *size_Mat];
+//       D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+//       D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+//       D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+//       D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+//       D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+//       D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+//       D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+//       D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+//       D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+//       D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+//       D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+//       D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+//       D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+//       D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+//       D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+//       D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+//       D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+//       D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+//       D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+//       D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+//       D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+//       D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+//       D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+//       D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+//       D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+//       D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
 //    }
 //    else
 //    {
-//       D27.f[W   ] = &DD27[E   *size_Mat];
-//       D27.f[E   ] = &DD27[W   *size_Mat];
-//       D27.f[S   ] = &DD27[N   *size_Mat];
-//       D27.f[N   ] = &DD27[S   *size_Mat];
-//       D27.f[B   ] = &DD27[T   *size_Mat];
-//       D27.f[T   ] = &DD27[B   *size_Mat];
-//       D27.f[SW  ] = &DD27[NE  *size_Mat];
-//       D27.f[NE  ] = &DD27[SW  *size_Mat];
-//       D27.f[NW  ] = &DD27[SE  *size_Mat];
-//       D27.f[SE  ] = &DD27[NW  *size_Mat];
-//       D27.f[BW  ] = &DD27[TE  *size_Mat];
-//       D27.f[TE  ] = &DD27[BW  *size_Mat];
-//       D27.f[TW  ] = &DD27[BE  *size_Mat];
-//       D27.f[BE  ] = &DD27[TW  *size_Mat];
-//       D27.f[BS  ] = &DD27[TN  *size_Mat];
-//       D27.f[TN  ] = &DD27[BS  *size_Mat];
-//       D27.f[TS  ] = &DD27[BN  *size_Mat];
-//       D27.f[BN  ] = &DD27[TS  *size_Mat];
-//       D27.f[REST] = &DD27[REST*size_Mat];
-//       D27.f[BSW ] = &DD27[TNE *size_Mat];
-//       D27.f[BNE ] = &DD27[TSW *size_Mat];
-//       D27.f[BNW ] = &DD27[TSE *size_Mat];
-//       D27.f[BSE ] = &DD27[TNW *size_Mat];
-//       D27.f[TSW ] = &DD27[BNE *size_Mat];
-//       D27.f[TNE ] = &DD27[BSW *size_Mat];
-//       D27.f[TNW ] = &DD27[BSE *size_Mat];
-//       D27.f[TSE ] = &DD27[BNW *size_Mat];
+//       D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+//       D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+//       D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+//       D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+//       D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+//       D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+//       D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+//       D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+//       D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+//       D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+//       D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+//       D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+//       D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+//       D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+//       D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+//       D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+//       D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+//       D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+//       D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+//       D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+//       D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+//       D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+//       D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+//       D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+//       D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+//       D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+//       D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
 //    }
 //    ////////////////////////////////////////////////////////////////////////////////
 //    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -432,20 +432,20 @@ extern "C" __global__ void CalcConc7( real* Conc,
 
 //       if(geoD[k] == GEO_FLUID)
 //       {
-//          Conc[k]    =   (D27.f[E   ])[ke  ]+ (D27.f[W   ])[kw  ]+ 
-//                         (D27.f[N   ])[kn  ]+ (D27.f[S   ])[ks  ]+
-//                         (D27.f[T   ])[kt  ]+ (D27.f[B   ])[kb  ]+
-//                         (D27.f[NE  ])[kne ]+ (D27.f[SW  ])[ksw ]+
-//                         (D27.f[SE  ])[kse ]+ (D27.f[NW  ])[knw ]+
-//                         (D27.f[TE  ])[kte ]+ (D27.f[BW  ])[kbw ]+
-//                         (D27.f[BE  ])[kbe ]+ (D27.f[TW  ])[ktw ]+
-//                         (D27.f[TN  ])[ktn ]+ (D27.f[BS  ])[kbs ]+
-//                         (D27.f[BN  ])[kbn ]+ (D27.f[TS  ])[kts ]+
-//                         (D27.f[REST])[kzero]+ 
-//                         (D27.f[TNE ])[ktne]+ (D27.f[TSW ])[ktsw]+
-//                         (D27.f[TSE ])[ktse]+ (D27.f[TNW ])[ktnw]+
-//                         (D27.f[BNE ])[kbne]+ (D27.f[BSW ])[kbsw]+
-//                         (D27.f[BSE ])[kbse]+ (D27.f[BNW ])[kbnw];
+//          Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
+//                         (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
+//                         (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
+//                         (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
+//                         (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
+//                         (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
+//                         (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
+//                         (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
+//                         (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+//                         (D27.f[DIR_000])[kzero]+ 
+//                         (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
+//                         (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
+//                         (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
+//                         (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
 //       }
 //    }   
 // }
@@ -469,7 +469,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetPlaneConc7(real* Conc,
+__global__ void GetPlaneConc7(real* Conc,
 								            int* kPC,
 								            unsigned int numberOfPointskPC,
 											unsigned int* geoD,
@@ -574,7 +574,7 @@ extern "C" __global__ void GetPlaneConc7(real* Conc,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetPlaneConc27(real* Conc,
+__global__ void GetPlaneConc27(real* Conc,
 								             int* kPC,
 								             unsigned int numberOfPointskPC,
 											 unsigned int* geoD,
@@ -588,63 +588,63 @@ extern "C" __global__ void GetPlaneConc27(real* Conc,
    Distributions27 D27;
    if (isEvenTimestep==true)
    {
-      D27.f[E   ] = &DD27[E   *size_Mat];
-      D27.f[W   ] = &DD27[W   *size_Mat];
-      D27.f[N   ] = &DD27[N   *size_Mat];
-      D27.f[S   ] = &DD27[S   *size_Mat];
-      D27.f[T   ] = &DD27[T   *size_Mat];
-      D27.f[B   ] = &DD27[B   *size_Mat];
-      D27.f[NE  ] = &DD27[NE  *size_Mat];
-      D27.f[SW  ] = &DD27[SW  *size_Mat];
-      D27.f[SE  ] = &DD27[SE  *size_Mat];
-      D27.f[NW  ] = &DD27[NW  *size_Mat];
-      D27.f[TE  ] = &DD27[TE  *size_Mat];
-      D27.f[BW  ] = &DD27[BW  *size_Mat];
-      D27.f[BE  ] = &DD27[BE  *size_Mat];
-      D27.f[TW  ] = &DD27[TW  *size_Mat];
-      D27.f[TN  ] = &DD27[TN  *size_Mat];
-      D27.f[BS  ] = &DD27[BS  *size_Mat];
-      D27.f[BN  ] = &DD27[BN  *size_Mat];
-      D27.f[TS  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[TNE ] = &DD27[TNE *size_Mat];
-      D27.f[TSW ] = &DD27[TSW *size_Mat];
-      D27.f[TSE ] = &DD27[TSE *size_Mat];
-      D27.f[TNW ] = &DD27[TNW *size_Mat];
-      D27.f[BNE ] = &DD27[BNE *size_Mat];
-      D27.f[BSW ] = &DD27[BSW *size_Mat];
-      D27.f[BSE ] = &DD27[BSE *size_Mat];
-      D27.f[BNW ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
    }
    else
    {
-      D27.f[W   ] = &DD27[E   *size_Mat];
-      D27.f[E   ] = &DD27[W   *size_Mat];
-      D27.f[S   ] = &DD27[N   *size_Mat];
-      D27.f[N   ] = &DD27[S   *size_Mat];
-      D27.f[B   ] = &DD27[T   *size_Mat];
-      D27.f[T   ] = &DD27[B   *size_Mat];
-      D27.f[SW  ] = &DD27[NE  *size_Mat];
-      D27.f[NE  ] = &DD27[SW  *size_Mat];
-      D27.f[NW  ] = &DD27[SE  *size_Mat];
-      D27.f[SE  ] = &DD27[NW  *size_Mat];
-      D27.f[BW  ] = &DD27[TE  *size_Mat];
-      D27.f[TE  ] = &DD27[BW  *size_Mat];
-      D27.f[TW  ] = &DD27[BE  *size_Mat];
-      D27.f[BE  ] = &DD27[TW  *size_Mat];
-      D27.f[BS  ] = &DD27[TN  *size_Mat];
-      D27.f[TN  ] = &DD27[BS  *size_Mat];
-      D27.f[TS  ] = &DD27[BN  *size_Mat];
-      D27.f[BN  ] = &DD27[TS  *size_Mat];
-      D27.f[REST] = &DD27[REST*size_Mat];
-      D27.f[BSW ] = &DD27[TNE *size_Mat];
-      D27.f[BNE ] = &DD27[TSW *size_Mat];
-      D27.f[BNW ] = &DD27[TSE *size_Mat];
-      D27.f[BSE ] = &DD27[TNW *size_Mat];
-      D27.f[TSW ] = &DD27[BNE *size_Mat];
-      D27.f[TNE ] = &DD27[BSW *size_Mat];
-      D27.f[TNW ] = &DD27[BSE *size_Mat];
-      D27.f[TSE ] = &DD27[BNW *size_Mat];
+      D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+      D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+      D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+      D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+      D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+      D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+      D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+      D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+      D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+      D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+      D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+      D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+      D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+      D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+      D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+      D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+      D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+      D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+      D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+      D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+      D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+      D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+      D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+      D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+      D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+      D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+      D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -693,20 +693,20 @@ extern "C" __global__ void GetPlaneConc27(real* Conc,
 
       if(geoD[k] == GEO_FLUID)
       {
-         Conc[k]    =   (D27.f[E   ])[ke  ]+ (D27.f[W   ])[kw  ]+ 
-                        (D27.f[N   ])[kn  ]+ (D27.f[S   ])[ks  ]+
-                        (D27.f[T   ])[kt  ]+ (D27.f[B   ])[kb  ]+
-                        (D27.f[NE  ])[kne ]+ (D27.f[SW  ])[ksw ]+
-                        (D27.f[SE  ])[kse ]+ (D27.f[NW  ])[knw ]+
-                        (D27.f[TE  ])[kte ]+ (D27.f[BW  ])[kbw ]+
-                        (D27.f[BE  ])[kbe ]+ (D27.f[TW  ])[ktw ]+
-                        (D27.f[TN  ])[ktn ]+ (D27.f[BS  ])[kbs ]+
-                        (D27.f[BN  ])[kbn ]+ (D27.f[TS  ])[kts ]+
-                        (D27.f[REST])[kzero]+ 
-                        (D27.f[TNE ])[ktne]+ (D27.f[TSW ])[ktsw]+
-                        (D27.f[TSE ])[ktse]+ (D27.f[TNW ])[ktnw]+
-                        (D27.f[BNE ])[kbne]+ (D27.f[BSW ])[kbsw]+
-                        (D27.f[BSE ])[kbse]+ (D27.f[BNW ])[kbnw];
+         Conc[k]    =   (D27.f[DIR_P00   ])[ke  ]+ (D27.f[DIR_M00   ])[kw  ]+ 
+                        (D27.f[DIR_0P0   ])[kn  ]+ (D27.f[DIR_0M0   ])[ks  ]+
+                        (D27.f[DIR_00P   ])[kt  ]+ (D27.f[DIR_00M   ])[kb  ]+
+                        (D27.f[DIR_PP0  ])[kne ]+ (D27.f[DIR_MM0  ])[ksw ]+
+                        (D27.f[DIR_PM0  ])[kse ]+ (D27.f[DIR_MP0  ])[knw ]+
+                        (D27.f[DIR_P0P  ])[kte ]+ (D27.f[DIR_M0M  ])[kbw ]+
+                        (D27.f[DIR_P0M  ])[kbe ]+ (D27.f[DIR_M0P  ])[ktw ]+
+                        (D27.f[DIR_0PP  ])[ktn ]+ (D27.f[DIR_0MM  ])[kbs ]+
+                        (D27.f[DIR_0PM  ])[kbn ]+ (D27.f[DIR_0MP  ])[kts ]+
+                        (D27.f[DIR_000])[kzero]+ 
+                        (D27.f[DIR_PPP ])[ktne]+ (D27.f[DIR_MMP ])[ktsw]+
+                        (D27.f[DIR_PMP ])[ktse]+ (D27.f[DIR_MPP ])[ktnw]+
+                        (D27.f[DIR_PPM ])[kbne]+ (D27.f[DIR_MMM ])[kbsw]+
+                        (D27.f[DIR_PMM ])[kbse]+ (D27.f[DIR_MPM ])[kbnw];
       }
    }   
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
index ba114ef1d80eeac869086179aab16428bffa74b4..4792b8846b2612383c07a97419e0473b21ebd187 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/CalcMac27.cu
@@ -19,7 +19,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMac27( real* vxD,
+__global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
                                         real* vzD,
                                         real* rhoD,
@@ -70,7 +70,7 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacSP27( real* vxD,
+__global__ void LBCalcMacSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -86,63 +86,63 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -195,66 +195,66 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
 
       if(geoD[k] == GEO_FLUID)
       {
-         rhoD[k]    =   (D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[REST])[kzero]+ 
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw];
-
-         vxD[k]     =   (D.f[E   ])[ke  ]- (D.f[W   ])[kw  ]+ 
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]- (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]- (D.f[TW  ])[ktw ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]- (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw];
-
-         vyD[k]     =   (D.f[N   ])[kn  ]- (D.f[S   ])[ks  ]+
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]-
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]- (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]- 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw];
-
-         vzD[k]     =   (D.f[T   ])[kt  ]- (D.f[B   ])[kb  ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]-
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]-
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]- 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw];
-
-         pressD[k]  =  ((D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
+         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_000])[kzero]+ 
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+
+         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
+
+         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+
+         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
+
+         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
                         2.f*(
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ])+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
                         3.f*(
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw])-
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
                         rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+c0o1*rhoD[k])) * c1o2+rhoD[k]; // times zero for incompressible case   
          //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
 
@@ -264,10 +264,19 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, real *rhoD, real *pressD,
-                                             unsigned int *geoD, unsigned int *neighborX, unsigned int *neighborY,
-                                             unsigned int *neighborZ, unsigned int size_Mat, real *distributions,
-                                             bool isEvenTimestep)
+__global__ void LBCalcMacCompSP27(
+   real *vxD,
+   real *vyD,
+   real *vzD,
+   real *rhoD,
+   real *pressD,
+   unsigned int *geoD,
+   unsigned int *neighborX,
+   unsigned int *neighborY,
+   unsigned int *neighborZ,
+   unsigned int size_Mat,
+   real *distributions,
+   bool isEvenTimestep)
 {
     const unsigned k = vf::gpu::getNodeIndex();
 
@@ -330,7 +339,7 @@ extern "C" __global__ void LBCalcMacCompSP27(real *vxD, real *vyD, real *vzD, re
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedSP27( real* vxD,
+__global__ void LBCalcMedSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -346,63 +355,63 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -461,70 +470,70 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
 
       if(geoD[k] == GEO_FLUID)
       {
-         rhoD[k]    =   (D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[REST])[kzero]+ 
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw]+
+         rhoD[k]    =   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_000])[kzero]+ 
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
                         RHO;
 
-         vxD[k]     =   (D.f[E   ])[ke  ]- (D.f[W   ])[kw  ]+ 
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]- (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]- (D.f[TW  ])[ktw ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]- (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw]+
+         vxD[k]     =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
                         VX;
 
-         vyD[k]     =   (D.f[N   ])[kn  ]- (D.f[S   ])[ks  ]+
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]-
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]- (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]- 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw]+
+         vyD[k]     =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]+
                         VY;
 
-         vzD[k]     =   (D.f[T   ])[kt  ]- (D.f[B   ])[kb  ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]-
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]-
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]- 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw]+
+         vzD[k]     =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]+
                         VZ;
 
-         pressD[k]  =   ((D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
+         pressD[k]  =   ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
                         c2o1*(
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ])+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
                         c3o1*(
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw])-
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
                         rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
                         PRESS;    
          //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
@@ -554,7 +563,7 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
+__global__ void LBCalcMedCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -570,63 +579,63 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -685,33 +694,33 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 
       if(geoD[k] == GEO_FLUID)
       {
-		  real mfcbb = (D.f[E])[k];//[ke   ];
-		  real mfabb = (D.f[W])[kw];//[kw   ];  
-		  real mfbcb = (D.f[N])[k];//[kn   ];
-		  real mfbab = (D.f[S])[ks];//[ks   ];  
-		  real mfbbc = (D.f[T])[k];//[kt   ];
-		  real mfbba = (D.f[B])[kb];//[kb   ];  
-		  real mfccb = (D.f[NE])[k];//[kne  ];  
-		  real mfaab = (D.f[SW])[ksw];//[ksw  ];
-		  real mfcab = (D.f[SE])[ks];//[kse  ]; 
-		  real mfacb = (D.f[NW])[kw];//[knw  ]; 
-		  real mfcbc = (D.f[TE])[k];//[kte  ];  
-		  real mfaba = (D.f[BW])[kbw];//[kbw  ];
-		  real mfcba = (D.f[BE])[kb];//[kbe  ]; 
-		  real mfabc = (D.f[TW])[kw];//[ktw  ]; 
-		  real mfbcc = (D.f[TN])[k];//[ktn  ];  
-		  real mfbaa = (D.f[BS])[kbs];//[kbs  ];
-		  real mfbca = (D.f[BN])[kb];//[kbn  ]; 
-		  real mfbac = (D.f[TS])[ks];//[kts  ]; 
-		  real mfbbb = (D.f[REST])[k];//[kzero];
-		  real mfccc = (D.f[TNE])[k];//[ktne ]; 
-		  real mfaac = (D.f[TSW])[ksw];//[ktsw ]; 
-		  real mfcac = (D.f[TSE])[ks];//[ktse ];
-		  real mfacc = (D.f[TNW])[kw];//[ktnw ];
-		  real mfcca = (D.f[BNE])[kb];//[kbne ];
-		  real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];
-		  real mfcaa = (D.f[BSE])[kbs];//[kbse ]; 
-		  real mfaca = (D.f[BNW])[kbw];//[kbnw ]; 
+		  real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
+		  real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
+		  real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
+		  real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
+		  real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
+		  real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
+		  real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
+		  real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
+		  real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
+		  real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
+		  real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
+		  real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
+		  real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
+		  real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
+		  real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
+		  real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
+		  real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
+		  real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
+		  real mfbbb = (D.f[DIR_000])[k];//[kzero];
+		  real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
+		  real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
+		  real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
+		  real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
+		  real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
+		  real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
+		  real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
+		  real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
 		  ////////////////////////////////////////////////////////////////////////////////////
 		  real drho = 
 			  ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
@@ -736,72 +745,72 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 			  (mfbbc - mfbba)) / rho) + VZ;
 
 		  //rhoD[k] =
-			 // (D.f[E])[ke] + (D.f[W])[kw] +
-			 // (D.f[N])[kn] + (D.f[S])[ks] +
-			 // (D.f[T])[kt] + (D.f[B])[kb] +
-			 // (D.f[NE])[kne] + (D.f[SW])[ksw] +
-			 // (D.f[SE])[kse] + (D.f[NW])[knw] +
-			 // (D.f[TE])[kte] + (D.f[BW])[kbw] +
-			 // (D.f[BE])[kbe] + (D.f[TW])[ktw] +
-			 // (D.f[TN])[ktn] + (D.f[BS])[kbs] +
-			 // (D.f[BN])[kbn] + (D.f[TS])[kts] +
-			 // (D.f[REST])[kzero] +
-			 // (D.f[TNE])[ktne] + (D.f[TSW])[ktsw] +
-			 // (D.f[TSE])[ktse] + (D.f[TNW])[ktnw] +
-			 // (D.f[BNE])[kbne] + (D.f[BSW])[kbsw] +
-			 // (D.f[BSE])[kbse] + (D.f[BNW])[kbnw];// +RHO;
+			 // (D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
+			 // (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
+			 // (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
+			 // (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
+			 // (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
+			 // (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
+			 // (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
+			 // (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
+			 // (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts] +
+			 // (D.f[DIR_000])[kzero] +
+			 // (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
+			 // (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
+			 // (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
+			 // (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw];// +RHO;
 
     //     vxD[k] =  
-			 //((D.f[E  ])[ke  ]- (D.f[W   ])[kw  ]+ 
-    //         (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]+
-    //         (D.f[SE  ])[kse ]- (D.f[NW  ])[knw ]+
-    //         (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]+
-    //         (D.f[BE  ])[kbe ]- (D.f[TW  ])[ktw ]+
-    //         (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]+ 
-    //         (D.f[TSE ])[ktse]- (D.f[TNW ])[ktnw]+ 
-    //         (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]+ 
-    //         (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw]) / (one + rhoD[k])+
+			 //((D.f[DIR_P00  ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
+    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
+    //         (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
+    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
+    //         (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
+    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
+    //         (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
+    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
+    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
     //         VX;
 
     //     vyD[k] =  
-			 //((D.f[N  ])[kn  ]- (D.f[S   ])[ks  ]+
-    //         (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]-
-    //         (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-    //         (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]+
-    //         (D.f[BN  ])[kbn ]- (D.f[TS  ])[kts ]+
-    //         (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]- 
-    //         (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-    //         (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-    //         (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw]) / (one + rhoD[k])+
+			 //((D.f[DIR_0P0  ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
+    //         (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
+    //         (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
+    //         (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
+    //         (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
+    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+    //         (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
     //         VY;
 
     //     vzD[k] =  
-			 //((D.f[T  ])[kt  ]- (D.f[B   ])[kb  ]+
-    //         (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]-
-    //         (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-    //         (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]-
-    //         (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-    //         (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-    //         (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]- 
-    //         (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-    //         (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw]) / (one + rhoD[k])+
+			 //((D.f[DIR_00P  ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
+    //         (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
+    //         (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+    //         (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
+    //         (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+    //         (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+    //         (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
+    //         (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+    //         (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw]) / (one + rhoD[k])+
     //         VZ;
 
-         pressD[k]  =  ((D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
+         pressD[k]  =  ((D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
                         c2o1*(
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ])+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ])+
                         c3o1*(
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw])-
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw])-
                         rhoD[k]-(vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1+rhoD[k])) * c1o2+rhoD[k]+
                         PRESS;    
          //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
@@ -831,7 +840,7 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMedCompAD27(
+__global__ void LBCalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -850,125 +859,125 @@ extern "C" __global__ void LBCalcMedCompAD27(
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[E] = &DD[E   *size_Mat];
-		D.f[W] = &DD[W   *size_Mat];
-		D.f[N] = &DD[N   *size_Mat];
-		D.f[S] = &DD[S   *size_Mat];
-		D.f[T] = &DD[T   *size_Mat];
-		D.f[B] = &DD[B   *size_Mat];
-		D.f[NE] = &DD[NE  *size_Mat];
-		D.f[SW] = &DD[SW  *size_Mat];
-		D.f[SE] = &DD[SE  *size_Mat];
-		D.f[NW] = &DD[NW  *size_Mat];
-		D.f[TE] = &DD[TE  *size_Mat];
-		D.f[BW] = &DD[BW  *size_Mat];
-		D.f[BE] = &DD[BE  *size_Mat];
-		D.f[TW] = &DD[TW  *size_Mat];
-		D.f[TN] = &DD[TN  *size_Mat];
-		D.f[BS] = &DD[BS  *size_Mat];
-		D.f[BN] = &DD[BN  *size_Mat];
-		D.f[TS] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[TNE *size_Mat];
-		D.f[TSW] = &DD[TSW *size_Mat];
-		D.f[TSE] = &DD[TSE *size_Mat];
-		D.f[TNW] = &DD[TNW *size_Mat];
-		D.f[BNE] = &DD[BNE *size_Mat];
-		D.f[BSW] = &DD[BSW *size_Mat];
-		D.f[BSE] = &DD[BSE *size_Mat];
-		D.f[BNW] = &DD[BNW *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
 	}
 	else
 	{
-		D.f[W] = &DD[E   *size_Mat];
-		D.f[E] = &DD[W   *size_Mat];
-		D.f[S] = &DD[N   *size_Mat];
-		D.f[N] = &DD[S   *size_Mat];
-		D.f[B] = &DD[T   *size_Mat];
-		D.f[T] = &DD[B   *size_Mat];
-		D.f[SW] = &DD[NE  *size_Mat];
-		D.f[NE] = &DD[SW  *size_Mat];
-		D.f[NW] = &DD[SE  *size_Mat];
-		D.f[SE] = &DD[NW  *size_Mat];
-		D.f[BW] = &DD[TE  *size_Mat];
-		D.f[TE] = &DD[BW  *size_Mat];
-		D.f[TW] = &DD[BE  *size_Mat];
-		D.f[BE] = &DD[TW  *size_Mat];
-		D.f[BS] = &DD[TN  *size_Mat];
-		D.f[TN] = &DD[BS  *size_Mat];
-		D.f[TS] = &DD[BN  *size_Mat];
-		D.f[BN] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[BSW *size_Mat];
-		D.f[TSW] = &DD[BNE *size_Mat];
-		D.f[TSE] = &DD[BNW *size_Mat];
-		D.f[TNW] = &DD[BSE *size_Mat];
-		D.f[BNE] = &DD[TSW *size_Mat];
-		D.f[BSW] = &DD[TNE *size_Mat];
-		D.f[BSE] = &DD[TNW *size_Mat];
-		D.f[BNW] = &DD[TSE *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	Distributions27 Dad;
 	if (isEvenTimestep == true)
 	{
-		Dad.f[E]    = &DD_AD[E   *size_Mat];
-		Dad.f[W]    = &DD_AD[W   *size_Mat];
-		Dad.f[N]    = &DD_AD[N   *size_Mat];
-		Dad.f[S]    = &DD_AD[S   *size_Mat];
-		Dad.f[T]    = &DD_AD[T   *size_Mat];
-		Dad.f[B]    = &DD_AD[B   *size_Mat];
-		Dad.f[NE]   = &DD_AD[NE  *size_Mat];
-		Dad.f[SW]   = &DD_AD[SW  *size_Mat];
-		Dad.f[SE]   = &DD_AD[SE  *size_Mat];
-		Dad.f[NW]   = &DD_AD[NW  *size_Mat];
-		Dad.f[TE]   = &DD_AD[TE  *size_Mat];
-		Dad.f[BW]   = &DD_AD[BW  *size_Mat];
-		Dad.f[BE]   = &DD_AD[BE  *size_Mat];
-		Dad.f[TW]   = &DD_AD[TW  *size_Mat];
-		Dad.f[TN]   = &DD_AD[TN  *size_Mat];
-		Dad.f[BS]   = &DD_AD[BS  *size_Mat];
-		Dad.f[BN]   = &DD_AD[BN  *size_Mat];
-		Dad.f[TS]   = &DD_AD[TS  *size_Mat];
-		Dad.f[REST] = &DD_AD[REST*size_Mat];
-		Dad.f[TNE]  = &DD_AD[TNE *size_Mat];
-		Dad.f[TSW]  = &DD_AD[TSW *size_Mat];
-		Dad.f[TSE]  = &DD_AD[TSE *size_Mat];
-		Dad.f[TNW]  = &DD_AD[TNW *size_Mat];
-		Dad.f[BNE]  = &DD_AD[BNE *size_Mat];
-		Dad.f[BSW]  = &DD_AD[BSW *size_Mat];
-		Dad.f[BSE]  = &DD_AD[BSE *size_Mat];
-		Dad.f[BNW]  = &DD_AD[BNW *size_Mat];
+		Dad.f[DIR_P00]    = &DD_AD[DIR_P00   *size_Mat];
+		Dad.f[DIR_M00]    = &DD_AD[DIR_M00   *size_Mat];
+		Dad.f[DIR_0P0]    = &DD_AD[DIR_0P0   *size_Mat];
+		Dad.f[DIR_0M0]    = &DD_AD[DIR_0M0   *size_Mat];
+		Dad.f[DIR_00P]    = &DD_AD[DIR_00P   *size_Mat];
+		Dad.f[DIR_00M]    = &DD_AD[DIR_00M   *size_Mat];
+		Dad.f[DIR_PP0]   = &DD_AD[DIR_PP0  *size_Mat];
+		Dad.f[DIR_MM0]   = &DD_AD[DIR_MM0  *size_Mat];
+		Dad.f[DIR_PM0]   = &DD_AD[DIR_PM0  *size_Mat];
+		Dad.f[DIR_MP0]   = &DD_AD[DIR_MP0  *size_Mat];
+		Dad.f[DIR_P0P]   = &DD_AD[DIR_P0P  *size_Mat];
+		Dad.f[DIR_M0M]   = &DD_AD[DIR_M0M  *size_Mat];
+		Dad.f[DIR_P0M]   = &DD_AD[DIR_P0M  *size_Mat];
+		Dad.f[DIR_M0P]   = &DD_AD[DIR_M0P  *size_Mat];
+		Dad.f[DIR_0PP]   = &DD_AD[DIR_0PP  *size_Mat];
+		Dad.f[DIR_0MM]   = &DD_AD[DIR_0MM  *size_Mat];
+		Dad.f[DIR_0PM]   = &DD_AD[DIR_0PM  *size_Mat];
+		Dad.f[DIR_0MP]   = &DD_AD[DIR_0MP  *size_Mat];
+		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
+		Dad.f[DIR_PPP]  = &DD_AD[DIR_PPP *size_Mat];
+		Dad.f[DIR_MMP]  = &DD_AD[DIR_MMP *size_Mat];
+		Dad.f[DIR_PMP]  = &DD_AD[DIR_PMP *size_Mat];
+		Dad.f[DIR_MPP]  = &DD_AD[DIR_MPP *size_Mat];
+		Dad.f[DIR_PPM]  = &DD_AD[DIR_PPM *size_Mat];
+		Dad.f[DIR_MMM]  = &DD_AD[DIR_MMM *size_Mat];
+		Dad.f[DIR_PMM]  = &DD_AD[DIR_PMM *size_Mat];
+		Dad.f[DIR_MPM]  = &DD_AD[DIR_MPM *size_Mat];
 	}						
 	else					
 	{						
-		Dad.f[W]    = &DD_AD[E   *size_Mat];
-		Dad.f[E]    = &DD_AD[W   *size_Mat];
-		Dad.f[S]    = &DD_AD[N   *size_Mat];
-		Dad.f[N]    = &DD_AD[S   *size_Mat];
-		Dad.f[B]    = &DD_AD[T   *size_Mat];
-		Dad.f[T]    = &DD_AD[B   *size_Mat];
-		Dad.f[SW]   = &DD_AD[NE  *size_Mat];
-		Dad.f[NE]   = &DD_AD[SW  *size_Mat];
-		Dad.f[NW]   = &DD_AD[SE  *size_Mat];
-		Dad.f[SE]   = &DD_AD[NW  *size_Mat];
-		Dad.f[BW]   = &DD_AD[TE  *size_Mat];
-		Dad.f[TE]   = &DD_AD[BW  *size_Mat];
-		Dad.f[TW]   = &DD_AD[BE  *size_Mat];
-		Dad.f[BE]   = &DD_AD[TW  *size_Mat];
-		Dad.f[BS]   = &DD_AD[TN  *size_Mat];
-		Dad.f[TN]   = &DD_AD[BS  *size_Mat];
-		Dad.f[TS]   = &DD_AD[BN  *size_Mat];
-		Dad.f[BN]   = &DD_AD[TS  *size_Mat];
-		Dad.f[REST] = &DD_AD[REST*size_Mat];
-		Dad.f[TNE]  = &DD_AD[BSW *size_Mat];
-		Dad.f[TSW]  = &DD_AD[BNE *size_Mat];
-		Dad.f[TSE]  = &DD_AD[BNW *size_Mat];
-		Dad.f[TNW]  = &DD_AD[BSE *size_Mat];
-		Dad.f[BNE]  = &DD_AD[TSW *size_Mat];
-		Dad.f[BSW]  = &DD_AD[TNE *size_Mat];
-		Dad.f[BSE]  = &DD_AD[TNW *size_Mat];
-		Dad.f[BNW]  = &DD_AD[TSE *size_Mat];
+		Dad.f[DIR_M00]    = &DD_AD[DIR_P00   *size_Mat];
+		Dad.f[DIR_P00]    = &DD_AD[DIR_M00   *size_Mat];
+		Dad.f[DIR_0M0]    = &DD_AD[DIR_0P0   *size_Mat];
+		Dad.f[DIR_0P0]    = &DD_AD[DIR_0M0   *size_Mat];
+		Dad.f[DIR_00M]    = &DD_AD[DIR_00P   *size_Mat];
+		Dad.f[DIR_00P]    = &DD_AD[DIR_00M   *size_Mat];
+		Dad.f[DIR_MM0]   = &DD_AD[DIR_PP0  *size_Mat];
+		Dad.f[DIR_PP0]   = &DD_AD[DIR_MM0  *size_Mat];
+		Dad.f[DIR_MP0]   = &DD_AD[DIR_PM0  *size_Mat];
+		Dad.f[DIR_PM0]   = &DD_AD[DIR_MP0  *size_Mat];
+		Dad.f[DIR_M0M]   = &DD_AD[DIR_P0P  *size_Mat];
+		Dad.f[DIR_P0P]   = &DD_AD[DIR_M0M  *size_Mat];
+		Dad.f[DIR_M0P]   = &DD_AD[DIR_P0M  *size_Mat];
+		Dad.f[DIR_P0M]   = &DD_AD[DIR_M0P  *size_Mat];
+		Dad.f[DIR_0MM]   = &DD_AD[DIR_0PP  *size_Mat];
+		Dad.f[DIR_0PP]   = &DD_AD[DIR_0MM  *size_Mat];
+		Dad.f[DIR_0MP]   = &DD_AD[DIR_0PM  *size_Mat];
+		Dad.f[DIR_0PM]   = &DD_AD[DIR_0MP  *size_Mat];
+		Dad.f[DIR_000] = &DD_AD[DIR_000*size_Mat];
+		Dad.f[DIR_PPP]  = &DD_AD[DIR_MMM *size_Mat];
+		Dad.f[DIR_MMP]  = &DD_AD[DIR_PPM *size_Mat];
+		Dad.f[DIR_PMP]  = &DD_AD[DIR_MPM *size_Mat];
+		Dad.f[DIR_MPP]  = &DD_AD[DIR_PMM *size_Mat];
+		Dad.f[DIR_PPM]  = &DD_AD[DIR_MMP *size_Mat];
+		Dad.f[DIR_MMM]  = &DD_AD[DIR_PPP *size_Mat];
+		Dad.f[DIR_PMM]  = &DD_AD[DIR_MPP *size_Mat];
+		Dad.f[DIR_MPM]  = &DD_AD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1029,33 +1038,33 @@ extern "C" __global__ void LBCalcMedCompAD27(
 
 		if (geoD[k] == GEO_FLUID)
 		{
-			real mfcbb = (D.f[E])[k];//[ke   ];
-			real mfabb = (D.f[W])[kw];//[kw   ];  
-			real mfbcb = (D.f[N])[k];//[kn   ];
-			real mfbab = (D.f[S])[ks];//[ks   ];  
-			real mfbbc = (D.f[T])[k];//[kt   ];
-			real mfbba = (D.f[B])[kb];//[kb   ];  
-			real mfccb = (D.f[NE])[k];//[kne  ];  
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];
-			real mfcab = (D.f[SE])[ks];//[kse  ]; 
-			real mfacb = (D.f[NW])[kw];//[knw  ]; 
-			real mfcbc = (D.f[TE])[k];//[kte  ];  
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];
-			real mfcba = (D.f[BE])[kb];//[kbe  ]; 
-			real mfabc = (D.f[TW])[kw];//[ktw  ]; 
-			real mfbcc = (D.f[TN])[k];//[ktn  ];  
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];
-			real mfbca = (D.f[BN])[kb];//[kbn  ]; 
-			real mfbac = (D.f[TS])[ks];//[kts  ]; 
-			real mfbbb = (D.f[REST])[k];//[kzero];
-			real mfccc = (D.f[TNE])[k];//[ktne ]; 
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ]; 
-			real mfcac = (D.f[TSE])[ks];//[ktse ];
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];
-			real mfcca = (D.f[BNE])[kb];//[kbne ];
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ]; 
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ]; 
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];  
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];  
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];  
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];  
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ]; 
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ]; 
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];  
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ]; 
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ]; 
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];  
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ]; 
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ]; 
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ]; 
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ]; 
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ]; 
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ]; 
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho =
 				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
@@ -1082,52 +1091,52 @@ extern "C" __global__ void LBCalcMedCompAD27(
 					(mfbbc - mfbba)) / rho) + VZ;
 
 			pressD[k] = 
-				((D.f[E])[ke] + (D.f[W])[kw] +
-				 (D.f[N])[kn] + (D.f[S])[ks] +
-				 (D.f[T])[kt] + (D.f[B])[kb] +
+				((D.f[DIR_P00])[ke] + (D.f[DIR_M00])[kw] +
+				 (D.f[DIR_0P0])[kn] + (D.f[DIR_0M0])[ks] +
+				 (D.f[DIR_00P])[kt] + (D.f[DIR_00M])[kb] +
 				 c2o1*(
-				 (D.f[NE])[kne] + (D.f[SW])[ksw] +
-				 (D.f[SE])[kse] + (D.f[NW])[knw] +
-				 (D.f[TE])[kte] + (D.f[BW])[kbw] +
-				 (D.f[BE])[kbe] + (D.f[TW])[ktw] +
-				 (D.f[TN])[ktn] + (D.f[BS])[kbs] +
-				 (D.f[BN])[kbn] + (D.f[TS])[kts]) +
+				 (D.f[DIR_PP0])[kne] + (D.f[DIR_MM0])[ksw] +
+				 (D.f[DIR_PM0])[kse] + (D.f[DIR_MP0])[knw] +
+				 (D.f[DIR_P0P])[kte] + (D.f[DIR_M0M])[kbw] +
+				 (D.f[DIR_P0M])[kbe] + (D.f[DIR_M0P])[ktw] +
+				 (D.f[DIR_0PP])[ktn] + (D.f[DIR_0MM])[kbs] +
+				 (D.f[DIR_0PM])[kbn] + (D.f[DIR_0MP])[kts]) +
 				 c3o1*(
-				 (D.f[TNE])[ktne] + (D.f[TSW])[ktsw] +
-				 (D.f[TSE])[ktse] + (D.f[TNW])[ktnw] +
-				 (D.f[BNE])[kbne] + (D.f[BSW])[kbsw] +
-				 (D.f[BSE])[kbse] + (D.f[BNW])[kbnw]) -
+				 (D.f[DIR_PPP])[ktne] + (D.f[DIR_MMP])[ktsw] +
+				 (D.f[DIR_PMP])[ktse] + (D.f[DIR_MPP])[ktnw] +
+				 (D.f[DIR_PPM])[kbne] + (D.f[DIR_MMM])[kbsw] +
+				 (D.f[DIR_PMM])[kbse] + (D.f[DIR_MPM])[kbnw]) -
 				 rhoD[k] - (vxD[k] * vxD[k] + vyD[k] * vyD[k] + vzD[k] * vzD[k]) * (c1o1 + rhoD[k])) * c1o2 + rhoD[k] +
 				 PRESS;
 				 //achtung op hart gesetzt Annahme op = 1 ;                                                    ^^^^(1.0/op-0.5)=0.5
 			//////////////////////////////////////////////////////////////////////////
-			mfcbb = (Dad.f[E   ])[k   ];
-			mfabb = (Dad.f[W   ])[kw  ];
-			mfbcb = (Dad.f[N   ])[k   ];
-			mfbab = (Dad.f[S   ])[ks  ];
-			mfbbc = (Dad.f[T   ])[k   ];
-			mfbba = (Dad.f[B   ])[kb  ];
-			mfccb = (Dad.f[NE  ])[k   ];
-			mfaab = (Dad.f[SW  ])[ksw ];
-			mfcab = (Dad.f[SE  ])[ks  ];
-			mfacb = (Dad.f[NW  ])[kw  ];
-			mfcbc = (Dad.f[TE  ])[k   ];
-			mfaba = (Dad.f[BW  ])[kbw ];
-			mfcba = (Dad.f[BE  ])[kb  ];
-			mfabc = (Dad.f[TW  ])[kw  ];
-			mfbcc = (Dad.f[TN  ])[k   ];
-			mfbaa = (Dad.f[BS  ])[kbs ];
-			mfbca = (Dad.f[BN  ])[kb  ];
-			mfbac = (Dad.f[TS  ])[ks  ];
-			mfbbb = (Dad.f[REST])[k   ];
-			mfccc = (Dad.f[TNE ])[k   ];
-			mfaac = (Dad.f[TSW ])[ksw ];
-			mfcac = (Dad.f[TSE ])[ks  ];
-			mfacc = (Dad.f[TNW ])[kw  ];
-			mfcca = (Dad.f[BNE ])[kb  ];
-			mfaaa = (Dad.f[BSW ])[kbsw];
-			mfcaa = (Dad.f[BSE ])[kbs ];
-			mfaca = (Dad.f[BNW ])[kbw ];
+			mfcbb = (Dad.f[DIR_P00   ])[k   ];
+			mfabb = (Dad.f[DIR_M00   ])[kw  ];
+			mfbcb = (Dad.f[DIR_0P0   ])[k   ];
+			mfbab = (Dad.f[DIR_0M0   ])[ks  ];
+			mfbbc = (Dad.f[DIR_00P   ])[k   ];
+			mfbba = (Dad.f[DIR_00M   ])[kb  ];
+			mfccb = (Dad.f[DIR_PP0  ])[k   ];
+			mfaab = (Dad.f[DIR_MM0  ])[ksw ];
+			mfcab = (Dad.f[DIR_PM0  ])[ks  ];
+			mfacb = (Dad.f[DIR_MP0  ])[kw  ];
+			mfcbc = (Dad.f[DIR_P0P  ])[k   ];
+			mfaba = (Dad.f[DIR_M0M  ])[kbw ];
+			mfcba = (Dad.f[DIR_P0M  ])[kb  ];
+			mfabc = (Dad.f[DIR_M0P  ])[kw  ];
+			mfbcc = (Dad.f[DIR_0PP  ])[k   ];
+			mfbaa = (Dad.f[DIR_0MM  ])[kbs ];
+			mfbca = (Dad.f[DIR_0PM  ])[kb  ];
+			mfbac = (Dad.f[DIR_0MP  ])[ks  ];
+			mfbbb = (Dad.f[DIR_000])[k   ];
+			mfccc = (Dad.f[DIR_PPP ])[k   ];
+			mfaac = (Dad.f[DIR_MMP ])[ksw ];
+			mfcac = (Dad.f[DIR_PMP ])[ks  ];
+			mfacc = (Dad.f[DIR_MPP ])[kw  ];
+			mfcca = (Dad.f[DIR_PPM ])[kb  ];
+			mfaaa = (Dad.f[DIR_MMM ])[kbsw];
+			mfcaa = (Dad.f[DIR_PMM ])[kbs ];
+			mfaca = (Dad.f[DIR_MPM ])[kbw ];
 			//////////////////////////////////////////////////////////////////////////
 			concD[k] = 
 				((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa)   + (mfaac + mfcca))) +
@@ -1159,7 +1168,7 @@ extern "C" __global__ void LBCalcMedCompAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
+__global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
                                              real* vzD,
                                              real* rhoD,
@@ -1231,7 +1240,7 @@ extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBResetMedianValuesSP27(
+__global__ void LBResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1284,7 +1293,7 @@ extern "C" __global__ void LBResetMedianValuesSP27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBResetMedianValuesAD27(
+__global__ void LBResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1339,7 +1348,7 @@ extern "C" __global__ void LBResetMedianValuesAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
+__global__ void LBCalcMeasurePoints( real* vxMP,
 												real* vyMP,
 												real* vzMP,
 												real* rhoMP,
@@ -1358,63 +1367,63 @@ extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1464,50 +1473,50 @@ extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
 
       if(geoD[kzero] == GEO_FLUID)
       {
-         rhoMP[kMac]=   (D.f[E   ])[ke  ]+ (D.f[W   ])[kw  ]+ 
-                        (D.f[N   ])[kn  ]+ (D.f[S   ])[ks  ]+
-                        (D.f[T   ])[kt  ]+ (D.f[B   ])[kb  ]+
-                        (D.f[NE  ])[kne ]+ (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]+ (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]+ (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[REST])[kzero]+ 
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]+ (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw];
-
-         vxMP[kMac] =   (D.f[E   ])[ke  ]- (D.f[W   ])[kw  ]+ 
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]+
-                        (D.f[SE  ])[kse ]- (D.f[NW  ])[knw ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]+
-                        (D.f[BE  ])[kbe ]- (D.f[TW  ])[ktw ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]- (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]+ 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw];
-
-         vyMP[kMac] =   (D.f[N   ])[kn  ]- (D.f[S   ])[ks  ]+
-                        (D.f[NE  ])[kne ]- (D.f[SW  ])[ksw ]-
-                        (D.f[SE  ])[kse ]+ (D.f[NW  ])[knw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]+
-                        (D.f[BN  ])[kbn ]- (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]- (D.f[TSW ])[ktsw]- 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]+ 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]+ (D.f[BNW ])[kbnw];
-
-         vzMP[kMac] =   (D.f[T   ])[kt  ]- (D.f[B   ])[kb  ]+
-                        (D.f[TE  ])[kte ]- (D.f[BW  ])[kbw ]-
-                        (D.f[BE  ])[kbe ]+ (D.f[TW  ])[ktw ]+
-                        (D.f[TN  ])[ktn ]- (D.f[BS  ])[kbs ]-
-                        (D.f[BN  ])[kbn ]+ (D.f[TS  ])[kts ]+
-                        (D.f[TNE ])[ktne]+ (D.f[TSW ])[ktsw]+ 
-                        (D.f[TSE ])[ktse]+ (D.f[TNW ])[ktnw]- 
-                        (D.f[BNE ])[kbne]- (D.f[BSW ])[kbsw]- 
-                        (D.f[BSE ])[kbse]- (D.f[BNW ])[kbnw];
+         rhoMP[kMac]=   (D.f[DIR_P00   ])[ke  ]+ (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_0P0   ])[kn  ]+ (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_00P   ])[kt  ]+ (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_PP0  ])[kne ]+ (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]+ (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]+ (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_000])[kzero]+ 
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]+ (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+
+         vxMP[kMac] =   (D.f[DIR_P00   ])[ke  ]- (D.f[DIR_M00   ])[kw  ]+ 
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]+
+                        (D.f[DIR_PM0  ])[kse ]- (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]+
+                        (D.f[DIR_P0M  ])[kbe ]- (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]- (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]+ 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
+
+         vyMP[kMac] =   (D.f[DIR_0P0   ])[kn  ]- (D.f[DIR_0M0   ])[ks  ]+
+                        (D.f[DIR_PP0  ])[kne ]- (D.f[DIR_MM0  ])[ksw ]-
+                        (D.f[DIR_PM0  ])[kse ]+ (D.f[DIR_MP0  ])[knw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]+
+                        (D.f[DIR_0PM  ])[kbn ]- (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]- (D.f[DIR_MMP ])[ktsw]- 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]+ 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]+ (D.f[DIR_MPM ])[kbnw];
+
+         vzMP[kMac] =   (D.f[DIR_00P   ])[kt  ]- (D.f[DIR_00M   ])[kb  ]+
+                        (D.f[DIR_P0P  ])[kte ]- (D.f[DIR_M0M  ])[kbw ]-
+                        (D.f[DIR_P0M  ])[kbe ]+ (D.f[DIR_M0P  ])[ktw ]+
+                        (D.f[DIR_0PP  ])[ktn ]- (D.f[DIR_0MM  ])[kbs ]-
+                        (D.f[DIR_0PM  ])[kbn ]+ (D.f[DIR_0MP  ])[kts ]+
+                        (D.f[DIR_PPP ])[ktne]+ (D.f[DIR_MMP ])[ktsw]+ 
+                        (D.f[DIR_PMP ])[ktse]+ (D.f[DIR_MPP ])[ktnw]- 
+                        (D.f[DIR_PPM ])[kbne]- (D.f[DIR_MMM ])[kbsw]- 
+                        (D.f[DIR_PMM ])[kbse]- (D.f[DIR_MPM ])[kbnw];
       }
    }
 }
@@ -1550,7 +1559,7 @@ extern "C" __global__ void LBCalcMeasurePoints( real* vxMP,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
+__global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* vyD,
 														real* vzD,
 														real* vxWall,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
index 6f4ac6ca5634f280b877cfd86fb7c01ce4870857..a79588421a624cae62ec32127739efb47bb7b2ef 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cascade27.cu
@@ -9,7 +9,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
+__global__ void LB_Kernel_Cascade_SP_27(     real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k  ];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -731,33 +731,33 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[ N   ])[k   ] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[ S   ])[ks  ] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[ T   ])[k   ] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[ B   ])[kb  ] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[ NE  ])[k   ] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[ SW  ])[ksw ] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[ SE  ])[ks  ] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[ NW  ])[kw  ] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[ TE  ])[k   ] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[ BW  ])[kbw ] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[ BE  ])[kb  ] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[ TW  ])[kw  ] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[ TN  ])[k   ] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[ BS  ])[kbs ] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[ BN  ])[kb  ] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[ TS  ])[ks  ] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[ REST])[k   ] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[ TNE ])[k   ] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[ TSE ])[ks  ] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[ BNE ])[kb  ] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[ BSE ])[kbs ] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[ TNW ])[kw  ] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[ TSW ])[ksw ] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[ BNW ])[kbw ] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[ BSW ])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[ DIR_P00   ])[k   ] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[ DIR_00P   ])[k   ] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[ DIR_000])[k   ] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -836,7 +836,7 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27(     real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
+__global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 														   unsigned int* bcMatD,
 														   unsigned int* neighborX,
 														   unsigned int* neighborY,
@@ -867,63 +867,63 @@ extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DDStart[E   *size_Mat];
-            D.f[W   ] = &DDStart[W   *size_Mat];
-            D.f[N   ] = &DDStart[N   *size_Mat];
-            D.f[S   ] = &DDStart[S   *size_Mat];
-            D.f[T   ] = &DDStart[T   *size_Mat];
-            D.f[B   ] = &DDStart[B   *size_Mat];
-            D.f[NE  ] = &DDStart[NE  *size_Mat];
-            D.f[SW  ] = &DDStart[SW  *size_Mat];
-            D.f[SE  ] = &DDStart[SE  *size_Mat];
-            D.f[NW  ] = &DDStart[NW  *size_Mat];
-            D.f[TE  ] = &DDStart[TE  *size_Mat];
-            D.f[BW  ] = &DDStart[BW  *size_Mat];
-            D.f[BE  ] = &DDStart[BE  *size_Mat];
-            D.f[TW  ] = &DDStart[TW  *size_Mat];
-            D.f[TN  ] = &DDStart[TN  *size_Mat];
-            D.f[BS  ] = &DDStart[BS  *size_Mat];
-            D.f[BN  ] = &DDStart[BN  *size_Mat];
-            D.f[TS  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[TNE ] = &DDStart[TNE *size_Mat];
-            D.f[TSW ] = &DDStart[TSW *size_Mat];
-            D.f[TSE ] = &DDStart[TSE *size_Mat];
-            D.f[TNW ] = &DDStart[TNW *size_Mat];
-            D.f[BNE ] = &DDStart[BNE *size_Mat];
-            D.f[BSW ] = &DDStart[BSW *size_Mat];
-            D.f[BSE ] = &DDStart[BSE *size_Mat];
-            D.f[BNW ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DDStart[E   *size_Mat];
-            D.f[E   ] = &DDStart[W   *size_Mat];
-            D.f[S   ] = &DDStart[N   *size_Mat];
-            D.f[N   ] = &DDStart[S   *size_Mat];
-            D.f[B   ] = &DDStart[T   *size_Mat];
-            D.f[T   ] = &DDStart[B   *size_Mat];
-            D.f[SW  ] = &DDStart[NE  *size_Mat];
-            D.f[NE  ] = &DDStart[SW  *size_Mat];
-            D.f[NW  ] = &DDStart[SE  *size_Mat];
-            D.f[SE  ] = &DDStart[NW  *size_Mat];
-            D.f[BW  ] = &DDStart[TE  *size_Mat];
-            D.f[TE  ] = &DDStart[BW  *size_Mat];
-            D.f[TW  ] = &DDStart[BE  *size_Mat];
-            D.f[BE  ] = &DDStart[TW  *size_Mat];
-            D.f[BS  ] = &DDStart[TN  *size_Mat];
-            D.f[TN  ] = &DDStart[BS  *size_Mat];
-            D.f[TS  ] = &DDStart[BN  *size_Mat];
-            D.f[BN  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[BSW ] = &DDStart[TNE *size_Mat];
-            D.f[BNE ] = &DDStart[TSW *size_Mat];
-            D.f[BNW ] = &DDStart[TSE *size_Mat];
-            D.f[BSE ] = &DDStart[TNW *size_Mat];
-            D.f[TSW ] = &DDStart[BNE *size_Mat];
-            D.f[TNE ] = &DDStart[BSW *size_Mat];
-            D.f[TNW ] = &DDStart[BSE *size_Mat];
-            D.f[TSE ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -956,33 +956,33 @@ extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
          unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real f_E     = (D.f[E   ])[ke   ];// +  c2over27 ;
-         real f_W     = (D.f[W   ])[kw   ];// +  c2over27 ;
-         real f_N     = (D.f[N   ])[kn   ];// +  c2over27 ;
-         real f_S     = (D.f[S   ])[ks   ];// +  c2over27 ;
-         real f_F     = (D.f[T   ])[kt   ];// +  c2over27 ;
-         real f_B     = (D.f[B   ])[kb   ];// +  c2over27 ;
-         real f_NE    = (D.f[NE  ])[kne  ];// +  c1over54 ;
-         real f_SW    = (D.f[SW  ])[ksw  ];// +  c1over54 ;
-         real f_SE    = (D.f[SE  ])[kse  ];// +  c1over54 ;
-         real f_NW    = (D.f[NW  ])[knw  ];// +  c1over54 ;
-         real f_Ef    = (D.f[TE  ])[kte  ];// +  c1over54 ;
-         real f_Wb    = (D.f[BW  ])[kbw  ];// +  c1over54 ;
-         real f_Eb    = (D.f[BE  ])[kbe  ];// +  c1over54 ;
-         real f_Wf    = (D.f[TW  ])[ktw  ];// +  c1over54 ;
-         real f_Nf    = (D.f[TN  ])[ktn  ];// +  c1over54 ;
-         real f_Sb    = (D.f[BS  ])[kbs  ];// +  c1over54 ;
-         real f_Nb    = (D.f[BN  ])[kbn  ];// +  c1over54 ;
-         real f_Sf    = (D.f[TS  ])[kts  ];// +  c1over54 ;
-         real f_R     = (D.f[REST])[kzero];// +  c8over27 ;
-         real f_Nef   = (D.f[TNE ])[ktne ];// +  c1over216;
-         real f_Swf   = (D.f[TSW ])[ktsw ];// +  c1over216;
-         real f_Sef   = (D.f[TSE ])[ktse ];// +  c1over216;
-         real f_Nwf   = (D.f[TNW ])[ktnw ];// +  c1over216;
-         real f_Neb   = (D.f[BNE ])[kbne ];// +  c1over216;
-         real f_Swb   = (D.f[BSW ])[kbsw ];// +  c1over216;
-         real f_Seb   = (D.f[BSE ])[kbse ];// +  c1over216;
-         real f_Nwb   = (D.f[BNW ])[kbnw ];// +  c1over216;
+         real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
+         real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
+         real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
+         real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
+         real f_F     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
+         real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
+         real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
+         real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
+         real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
+         real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
+         real f_Ef    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
+         real f_Wb    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
+         real f_Eb    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
+         real f_Wf    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
+         real f_Nf    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
+         real f_Sb    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
+         real f_Nb    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
+         real f_Sf    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+         real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
+         real f_Nef   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
+         real f_Swf   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
+         real f_Sef   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
+         real f_Nwf   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
+         real f_Neb   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
+         real f_Swb   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
+         real f_Seb   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
+         real f_Nwb   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
          ////////////////////////////////////////////////////////////////////////////////////
 		 real rho=f_NW+f_W+f_SW+f_S+f_SE+f_E+f_NE+f_N+f_R+f_Nf+f_Nb+f_Sf+f_Sb+f_Ef+f_Eb+f_Wf+f_Wb+f_Nwf+f_Nwb+f_Nef+f_Neb+f_Swf+f_Swb+f_Sef+f_Seb+f_F+f_B+c1o1;// ACHTUNG ne EINS !!!!!!!!
 		 real pix=(f_NE+f_E+f_SE+f_Ef+f_Eb-f_NW-f_W-f_SW-f_Wf-f_Wb+f_Nef+f_Neb+f_Sef+f_Seb-f_Nwf-f_Nwb-f_Swf-f_Swb);
@@ -1611,33 +1611,33 @@ extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 		
 			   
 		 ////////////////////////////////////////////////////////////////////////////////////
-		 (D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;//                                                                     
-		 (D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;                                                                     
-		 (D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;
-		 (D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;
-		 (D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;
-		 (D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;
-		 (D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;
-		 (D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;
-		 (D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;
-		 (D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;
-		 (D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;
-		 (D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;
-		 (D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;
-		 (D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;
-		 (D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;
-		 (D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;
-		 (D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;
-		 (D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;
-		 (D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;
-		 (D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;
-		 (D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;
-		 (D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;
-		 (D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;
-		 (D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;
-		 (D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;
-		 (D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;
-		 (D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;
+		 (D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;//                                                                     
+		 (D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;                                                                     
+		 (D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;
+		 (D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;
+		 (D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;
+		 (D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;
+		 (D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;
+		 (D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;
+		 (D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;
+		 (D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;
+		 (D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;
+		 (D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;
+		 (D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;
+		 (D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;
+		 (D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;
+		 (D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;
+		 (D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;
+		 (D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;
+		 (D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;
+		 (D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;
+		 (D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;
+		 (D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;
+		 (D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;
+		 (D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;
+		 (D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;
+		 (D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;
+		 (D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;
 		 ////////////////////////////////////////////////////////////////////////////////////
       }                                                                                                                    
    }
@@ -1683,7 +1683,7 @@ extern "C" __global__ void LB_Kernel_Casc_Comp_SP_27(      real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
+__global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
                                                          unsigned int* bcMatD,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
@@ -1714,63 +1714,63 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DDStart[E   *size_Mat];
-            D.f[W   ] = &DDStart[W   *size_Mat];
-            D.f[N   ] = &DDStart[N   *size_Mat];
-            D.f[S   ] = &DDStart[S   *size_Mat];
-            D.f[T   ] = &DDStart[T   *size_Mat];
-            D.f[B   ] = &DDStart[B   *size_Mat];
-            D.f[NE  ] = &DDStart[NE  *size_Mat];
-            D.f[SW  ] = &DDStart[SW  *size_Mat];
-            D.f[SE  ] = &DDStart[SE  *size_Mat];
-            D.f[NW  ] = &DDStart[NW  *size_Mat];
-            D.f[TE  ] = &DDStart[TE  *size_Mat];
-            D.f[BW  ] = &DDStart[BW  *size_Mat];
-            D.f[BE  ] = &DDStart[BE  *size_Mat];
-            D.f[TW  ] = &DDStart[TW  *size_Mat];
-            D.f[TN  ] = &DDStart[TN  *size_Mat];
-            D.f[BS  ] = &DDStart[BS  *size_Mat];
-            D.f[BN  ] = &DDStart[BN  *size_Mat];
-            D.f[TS  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[TNE ] = &DDStart[TNE *size_Mat];
-            D.f[TSW ] = &DDStart[TSW *size_Mat];
-            D.f[TSE ] = &DDStart[TSE *size_Mat];
-            D.f[TNW ] = &DDStart[TNW *size_Mat];
-            D.f[BNE ] = &DDStart[BNE *size_Mat];
-            D.f[BSW ] = &DDStart[BSW *size_Mat];
-            D.f[BSE ] = &DDStart[BSE *size_Mat];
-            D.f[BNW ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DDStart[E   *size_Mat];
-            D.f[E   ] = &DDStart[W   *size_Mat];
-            D.f[S   ] = &DDStart[N   *size_Mat];
-            D.f[N   ] = &DDStart[S   *size_Mat];
-            D.f[B   ] = &DDStart[T   *size_Mat];
-            D.f[T   ] = &DDStart[B   *size_Mat];
-            D.f[SW  ] = &DDStart[NE  *size_Mat];
-            D.f[NE  ] = &DDStart[SW  *size_Mat];
-            D.f[NW  ] = &DDStart[SE  *size_Mat];
-            D.f[SE  ] = &DDStart[NW  *size_Mat];
-            D.f[BW  ] = &DDStart[TE  *size_Mat];
-            D.f[TE  ] = &DDStart[BW  *size_Mat];
-            D.f[TW  ] = &DDStart[BE  *size_Mat];
-            D.f[BE  ] = &DDStart[TW  *size_Mat];
-            D.f[BS  ] = &DDStart[TN  *size_Mat];
-            D.f[TN  ] = &DDStart[BS  *size_Mat];
-            D.f[TS  ] = &DDStart[BN  *size_Mat];
-            D.f[BN  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[BSW ] = &DDStart[TNE *size_Mat];
-            D.f[BNE ] = &DDStart[TSW *size_Mat];
-            D.f[BNW ] = &DDStart[TSE *size_Mat];
-            D.f[BSE ] = &DDStart[TNW *size_Mat];
-            D.f[TSW ] = &DDStart[BNE *size_Mat];
-            D.f[TNE ] = &DDStart[BSW *size_Mat];
-            D.f[TNW ] = &DDStart[BSE *size_Mat];
-            D.f[TSE ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -1803,33 +1803,33 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[E   ])[k  ];//ke
-         real fW    =  (D.f[W   ])[kw ];
-         real fN    =  (D.f[N   ])[k  ];//kn
-         real fS    =  (D.f[S   ])[ks ];
-         real fT    =  (D.f[T   ])[k  ];//kt
-         real fB    =  (D.f[B   ])[kb ];
-         real fNE   =  (D.f[NE  ])[k  ];//kne
-         real fSW   =  (D.f[SW  ])[ksw];
-         real fSE   =  (D.f[SE  ])[ks ];//kse
-         real fNW   =  (D.f[NW  ])[kw ];//knw
-         real fTE   =  (D.f[TE  ])[k  ];//kte
-         real fBW   =  (D.f[BW  ])[kbw];
-         real fBE   =  (D.f[BE  ])[kb ];//kbe
-         real fTW   =  (D.f[TW  ])[kw ];//ktw
-         real fTN   =  (D.f[TN  ])[k  ];//ktn
-         real fBS   =  (D.f[BS  ])[kbs];
-         real fBN   =  (D.f[BN  ])[kb ];//kbn
-         real fTS   =  (D.f[TS  ])[ks ];//kts
-         real fZERO =  (D.f[REST])[k  ];//kzero
-         real fTNE   = (D.f[TNE ])[k  ];//ktne
-         real fTSW   = (D.f[TSW ])[ksw];//ktsw
-         real fTSE   = (D.f[TSE ])[ks ];//ktse
-         real fTNW   = (D.f[TNW ])[kw ];//ktnw
-         real fBNE   = (D.f[BNE ])[kb ];//kbne
-         real fBSW   = (D.f[BSW ])[kbsw];
-         real fBSE   = (D.f[BSE ])[kbs];//kbse
-         real fBNW   = (D.f[BNW ])[kbw];//kbnw
+         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
+         real fW    =  (D.f[DIR_M00   ])[kw ];
+         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0   ])[ks ];
+         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
+         real fB    =  (D.f[DIR_00M   ])[kb ];
+         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0  ])[ksw];
+         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M  ])[kbw];
+         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM  ])[kbs];
+         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fZERO =  (D.f[DIR_000])[k  ];//kzero
+         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM ])[kbsw];
+         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  (fTNE+fBSW)+(fTSW+fBNE)+(fTSE+fBNW)+(fTNW+fBSE)+(fNE+fSW)+(fNW+fSE)+(fTE+fBW)+(fBE+fTW)+(fTN+fBS)+(fBN+fTS)+(fE+fW)+(fN+fS)+(fT+fB)+fZERO;
          real rho    =  rho0 + c1o1;
@@ -2185,92 +2185,92 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
             c2o1*(/*vx2y*MzYZZ +*/  vx*MzXYYZZ + vz*MzXXYYZ /*+ vyz2*MzXXY + vx2z*MzYYZ + vxy2*MzXZZ + vxz2*MzXYY*/ + vy*MzXXYZZ/* + vy2z*MzXXZ*/);//+ 
             //four*(/*vxy2z*MzXZ + vx2yz*MzYZ + vxyz2*MzXY +*/ vxy*MzXYZZ + vxz*MzXYYZ + vyz*MzXXYZ);
 
-         //(D.f[ E   ])[k   ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102 - vx   );   //ke
-         //(D.f[ W   ])[kw  ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102 + vx   );   
-         //(D.f[ N   ])[k   ] =   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012 - vy   );   //kn
-         //(D.f[ S   ])[ks  ] =   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012 + vy   );   
-         //(D.f[ T   ])[k   ] =   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002 - vz   );   //kt
-         //(D.f[ B   ])[kb  ] =   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002 + vz   );   
-         //(D.f[ NE  ])[k   ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
-         //(D.f[ SW  ])[ksw ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
-         //(D.f[ SE  ])[ks  ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
-         //(D.f[ NW  ])[kw  ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
-         //(D.f[ TE  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
-         //(D.f[ BW  ])[kbw ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
-         //(D.f[ BE  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
-         //(D.f[ TW  ])[kw  ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
-         //(D.f[ TN  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
-         //(D.f[ BS  ])[kbs ] =  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
-         //(D.f[ BN  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
-         //(D.f[ TS  ])[ks  ] =  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
-         //(D.f[ REST])[k   ] =       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        )+rho0;   //kzero
-         //(D.f[ TNE ])[k   ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
-         //(D.f[ TSE ])[ks  ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
-         //(D.f[ BNE ])[kb  ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
-         //(D.f[ BSE ])[kbs ] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
-         //(D.f[ TNW ])[kw  ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
-         //(D.f[ TSW ])[ksw ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
-         //(D.f[ BNW ])[kbw ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
-         //(D.f[ BSW ])[kbsw] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
-         (D.f[ E   ])[k   ] =   c1o2*rho*(+ mu222 + (                          - mu220 - mu202        ) + (                         + mu200                ) + (                 - mu122) + (                                          + mu102 + mu120) + ( - vx            ) );   //ke
-         (D.f[ W   ])[kw  ] =   c1o2*rho*(+ mu222 + (                          - mu220 - mu202        ) + (                         + mu200                ) + (                 + mu122) + (                                          - mu102 - mu120) + ( + vx            ) );   
-         (D.f[ N   ])[k   ] =   c1o2*rho*(+ mu222 + (                          - mu220         - mu022) + (                                 + mu020        ) + (         - mu212        ) + (                          + mu012 + mu210                ) + (      - vy       ) );   //kn
-         (D.f[ S   ])[ks  ] =   c1o2*rho*(+ mu222 + (                          - mu220         - mu022) + (                                 + mu020        ) + (         + mu212        ) + (                          - mu012 - mu210                ) + (      + vy       ) );   
-         (D.f[ T   ])[k   ] =   c1o2*rho*(+ mu222 + (                                  - mu202 - mu022) + (                                         + mu002) + ( - mu221                ) + (         + mu201 +  mu021                                ) + (           - vz  ) );   //kt
-         (D.f[ B   ])[kb  ] =   c1o2*rho*(+ mu222 + (                                  - mu202 - mu022) + (                                         + mu002) + ( + mu221                ) + (         - mu201 -  mu021                                ) + (           + vz  ) );   
-         (D.f[ NE  ])[k   ] =  c1o4*rho*(- mu222 + (                  - mu112 + mu220                ) + (+  mu110                                        ) + (         + mu212 + mu122) + (                                  - mu210         - mu120)                       );   //kne
-         (D.f[ SW  ])[ksw ] =  c1o4*rho*(- mu222 + (                  - mu112 + mu220                ) + (+  mu110                                        ) + (         - mu212 - mu122) + (                                  + mu210         + mu120)                       );   
-         (D.f[ SE  ])[ks  ] =  c1o4*rho*(- mu222 + (                  + mu112 + mu220                ) + (-  mu110                                        ) + (         - mu212 + mu122) + (                                  + mu210         - mu120)                       );   //kse
-         (D.f[ NW  ])[kw  ] =  c1o4*rho*(- mu222 + (                  + mu112 + mu220                ) + (-  mu110                                        ) + (         + mu212 - mu122) + (                                  - mu210         + mu120)                       );   //knw
-         (D.f[ TE  ])[k   ] =  c1o4*rho*(- mu222 + (        -  mu121                  + mu202        ) + (         + mu101                                ) + ( + mu221         + mu122) + (         - mu201                          - mu102        )                       );   //kte
-         (D.f[ BW  ])[kbw ] =  c1o4*rho*(- mu222 + (        -  mu121                  + mu202        ) + (         + mu101                                ) + ( - mu221         - mu122) + (         + mu201                          + mu102        )                       );   
-         (D.f[ BE  ])[kb  ] =  c1o4*rho*(- mu222 + (        +  mu121                  + mu202        ) + (         - mu101                                ) + ( - mu221         + mu122) + (         + mu201                          - mu102        )                       );   //kbe
-         (D.f[ TW  ])[kw  ] =  c1o4*rho*(- mu222 + (        +  mu121                  + mu202        ) + (         - mu101                                ) + ( + mu221         - mu122) + (         - mu201                          + mu102        )                       );   //ktw
-         (D.f[ TN  ])[k   ] =  c1o4*rho*(- mu222 + (- mu211                                   + mu022) + (                 + mu011                        ) + ( + mu221 + mu212        ) + (                 -  mu021 - mu012                        )                       );   //ktn
-         (D.f[ BS  ])[kbs ] =  c1o4*rho*(- mu222 + (- mu211                                   + mu022) + (                 + mu011                        ) + ( - mu221 - mu212        ) + (                 +  mu021 + mu012                        )                       );   
-         (D.f[ BN  ])[kb  ] =  c1o4*rho*(- mu222 + (+ mu211                                   + mu022) + (                 - mu011                        ) + ( - mu221 + mu212        ) + (                 +  mu021 - mu012                        )                       );   //kbn
-         (D.f[ TS  ])[ks  ] =  c1o4*rho*(- mu222 + (+ mu211                                   + mu022) + (                 - mu011                        ) + ( + mu221 - mu212        ) + (                 -  mu021 + mu012                        )                       );   //kts
-         (D.f[ REST])[k   ] =       rho*(- mu222 + (                          + mu220 + mu202 + mu022) + (                         - mu200 - mu020 - mu002)                                                                                                                  )+rho0;   //kzero
-         (D.f[ TNE ])[k   ] = c1o8*rho*(+ mu222 + (+ mu211 +  mu121 + mu112                         )                                                      + ( - mu221 - mu212 - mu122) + ( - mu111                                                 )                       );   //ktne
-         (D.f[ TSE ])[ks  ] = c1o8*rho*(+ mu222 + (- mu211 +  mu121 - mu112                         )                                                      + ( - mu221 + mu212 - mu122) + ( + mu111                                                 )                       );   //ktse
-         (D.f[ BNE ])[kb  ] = c1o8*rho*(+ mu222 + (- mu211 -  mu121 + mu112                         )                                                      + ( + mu221 - mu212 - mu122) + ( + mu111                                                 )                       );   //kbne
-         (D.f[ BSE ])[kbs ] = c1o8*rho*(+ mu222 + (+ mu211 -  mu121 - mu112                         )                                                      + ( + mu221 + mu212 - mu122) + ( - mu111                                                 )                       );   //kbse
-         (D.f[ TNW ])[kw  ] = c1o8*rho*(+ mu222 + (+ mu211 -  mu121 - mu112                         )                                                      + ( - mu221 - mu212 + mu122) + ( + mu111                                                 )                       );   //ktnw
-         (D.f[ TSW ])[ksw ] = c1o8*rho*(+ mu222 + (- mu211 -  mu121 + mu112                         )                                                      + ( - mu221 + mu212 + mu122) + ( - mu111                                                 )                       );   //ktsw
-         (D.f[ BNW ])[kbw ] = c1o8*rho*(+ mu222 + (- mu211 +  mu121 - mu112                         )                                                      + ( + mu221 - mu212 + mu122) + ( - mu111                                                 )                       );   //kbnw
-         (D.f[ BSW ])[kbsw] = c1o8*rho*(+ mu222 + (+ mu211 +  mu121 + mu112                         )                                                      + ( + mu221 + mu212 + mu122) + ( + mu111                                                 )                       );   
+         //(D.f[ DIR_P00   ])[k   ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102 - vx   );   //ke
+         //(D.f[ DIR_M00   ])[kw  ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102 + vx   );   
+         //(D.f[ DIR_0P0   ])[k   ] =   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012 - vy   );   //kn
+         //(D.f[ DIR_0M0   ])[ks  ] =   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012 + vy   );   
+         //(D.f[ DIR_00P   ])[k   ] =   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002 - vz   );   //kt
+         //(D.f[ DIR_00M   ])[kb  ] =   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002 + vz   );   
+         //(D.f[ DIR_PP0  ])[k   ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
+         //(D.f[ DIR_MM0  ])[ksw ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
+         //(D.f[ DIR_PM0  ])[ks  ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
+         //(D.f[ DIR_MP0  ])[kw  ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
+         //(D.f[ DIR_P0P  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
+         //(D.f[ DIR_M0M  ])[kbw ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
+         //(D.f[ DIR_P0M  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
+         //(D.f[ DIR_M0P  ])[kw  ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
+         //(D.f[ DIR_0PP  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
+         //(D.f[ DIR_0MM  ])[kbs ] =  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
+         //(D.f[ DIR_0PM  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
+         //(D.f[ DIR_0MP  ])[ks  ] =  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
+         //(D.f[ DIR_000])[k   ] =       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        )+rho0;   //kzero
+         //(D.f[ DIR_PPP ])[k   ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
+         //(D.f[ DIR_PMP ])[ks  ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
+         //(D.f[ DIR_PPM ])[kb  ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
+         //(D.f[ DIR_PMM ])[kbs ] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
+         //(D.f[ DIR_MPP ])[kw  ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
+         //(D.f[ DIR_MMP ])[ksw ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
+         //(D.f[ DIR_MPM ])[kbw ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
+         //(D.f[ DIR_MMM ])[kbsw] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
+         (D.f[ DIR_P00   ])[k   ] =   c1o2*rho*(+ mu222 + (                          - mu220 - mu202        ) + (                         + mu200                ) + (                 - mu122) + (                                          + mu102 + mu120) + ( - vx            ) );   //ke
+         (D.f[ DIR_M00   ])[kw  ] =   c1o2*rho*(+ mu222 + (                          - mu220 - mu202        ) + (                         + mu200                ) + (                 + mu122) + (                                          - mu102 - mu120) + ( + vx            ) );   
+         (D.f[ DIR_0P0   ])[k   ] =   c1o2*rho*(+ mu222 + (                          - mu220         - mu022) + (                                 + mu020        ) + (         - mu212        ) + (                          + mu012 + mu210                ) + (      - vy       ) );   //kn
+         (D.f[ DIR_0M0   ])[ks  ] =   c1o2*rho*(+ mu222 + (                          - mu220         - mu022) + (                                 + mu020        ) + (         + mu212        ) + (                          - mu012 - mu210                ) + (      + vy       ) );   
+         (D.f[ DIR_00P   ])[k   ] =   c1o2*rho*(+ mu222 + (                                  - mu202 - mu022) + (                                         + mu002) + ( - mu221                ) + (         + mu201 +  mu021                                ) + (           - vz  ) );   //kt
+         (D.f[ DIR_00M   ])[kb  ] =   c1o2*rho*(+ mu222 + (                                  - mu202 - mu022) + (                                         + mu002) + ( + mu221                ) + (         - mu201 -  mu021                                ) + (           + vz  ) );   
+         (D.f[ DIR_PP0  ])[k   ] =  c1o4*rho*(- mu222 + (                  - mu112 + mu220                ) + (+  mu110                                        ) + (         + mu212 + mu122) + (                                  - mu210         - mu120)                       );   //kne
+         (D.f[ DIR_MM0  ])[ksw ] =  c1o4*rho*(- mu222 + (                  - mu112 + mu220                ) + (+  mu110                                        ) + (         - mu212 - mu122) + (                                  + mu210         + mu120)                       );   
+         (D.f[ DIR_PM0  ])[ks  ] =  c1o4*rho*(- mu222 + (                  + mu112 + mu220                ) + (-  mu110                                        ) + (         - mu212 + mu122) + (                                  + mu210         - mu120)                       );   //kse
+         (D.f[ DIR_MP0  ])[kw  ] =  c1o4*rho*(- mu222 + (                  + mu112 + mu220                ) + (-  mu110                                        ) + (         + mu212 - mu122) + (                                  - mu210         + mu120)                       );   //knw
+         (D.f[ DIR_P0P  ])[k   ] =  c1o4*rho*(- mu222 + (        -  mu121                  + mu202        ) + (         + mu101                                ) + ( + mu221         + mu122) + (         - mu201                          - mu102        )                       );   //kte
+         (D.f[ DIR_M0M  ])[kbw ] =  c1o4*rho*(- mu222 + (        -  mu121                  + mu202        ) + (         + mu101                                ) + ( - mu221         - mu122) + (         + mu201                          + mu102        )                       );   
+         (D.f[ DIR_P0M  ])[kb  ] =  c1o4*rho*(- mu222 + (        +  mu121                  + mu202        ) + (         - mu101                                ) + ( - mu221         + mu122) + (         + mu201                          - mu102        )                       );   //kbe
+         (D.f[ DIR_M0P  ])[kw  ] =  c1o4*rho*(- mu222 + (        +  mu121                  + mu202        ) + (         - mu101                                ) + ( + mu221         - mu122) + (         - mu201                          + mu102        )                       );   //ktw
+         (D.f[ DIR_0PP  ])[k   ] =  c1o4*rho*(- mu222 + (- mu211                                   + mu022) + (                 + mu011                        ) + ( + mu221 + mu212        ) + (                 -  mu021 - mu012                        )                       );   //ktn
+         (D.f[ DIR_0MM  ])[kbs ] =  c1o4*rho*(- mu222 + (- mu211                                   + mu022) + (                 + mu011                        ) + ( - mu221 - mu212        ) + (                 +  mu021 + mu012                        )                       );   
+         (D.f[ DIR_0PM  ])[kb  ] =  c1o4*rho*(- mu222 + (+ mu211                                   + mu022) + (                 - mu011                        ) + ( - mu221 + mu212        ) + (                 +  mu021 - mu012                        )                       );   //kbn
+         (D.f[ DIR_0MP  ])[ks  ] =  c1o4*rho*(- mu222 + (+ mu211                                   + mu022) + (                 - mu011                        ) + ( + mu221 - mu212        ) + (                 -  mu021 + mu012                        )                       );   //kts
+         (D.f[ DIR_000])[k   ] =       rho*(- mu222 + (                          + mu220 + mu202 + mu022) + (                         - mu200 - mu020 - mu002)                                                                                                                  )+rho0;   //kzero
+         (D.f[ DIR_PPP ])[k   ] = c1o8*rho*(+ mu222 + (+ mu211 +  mu121 + mu112                         )                                                      + ( - mu221 - mu212 - mu122) + ( - mu111                                                 )                       );   //ktne
+         (D.f[ DIR_PMP ])[ks  ] = c1o8*rho*(+ mu222 + (- mu211 +  mu121 - mu112                         )                                                      + ( - mu221 + mu212 - mu122) + ( + mu111                                                 )                       );   //ktse
+         (D.f[ DIR_PPM ])[kb  ] = c1o8*rho*(+ mu222 + (- mu211 -  mu121 + mu112                         )                                                      + ( + mu221 - mu212 - mu122) + ( + mu111                                                 )                       );   //kbne
+         (D.f[ DIR_PMM ])[kbs ] = c1o8*rho*(+ mu222 + (+ mu211 -  mu121 - mu112                         )                                                      + ( + mu221 + mu212 - mu122) + ( - mu111                                                 )                       );   //kbse
+         (D.f[ DIR_MPP ])[kw  ] = c1o8*rho*(+ mu222 + (+ mu211 -  mu121 - mu112                         )                                                      + ( - mu221 - mu212 + mu122) + ( + mu111                                                 )                       );   //ktnw
+         (D.f[ DIR_MMP ])[ksw ] = c1o8*rho*(+ mu222 + (- mu211 -  mu121 + mu112                         )                                                      + ( - mu221 + mu212 + mu122) + ( - mu111                                                 )                       );   //ktsw
+         (D.f[ DIR_MPM ])[kbw ] = c1o8*rho*(+ mu222 + (- mu211 +  mu121 - mu112                         )                                                      + ( + mu221 - mu212 + mu122) + ( - mu111                                                 )                       );   //kbnw
+         (D.f[ DIR_MMM ])[kbsw] = c1o8*rho*(+ mu222 + (+ mu211 +  mu121 + mu112                         )                                                      + ( + mu221 + mu212 + mu122) + ( + mu111                                                 )                       );   
                                                                                                                                                                                                                                                                 
                                                                                                                                                                                                                                                                 
          //////////////////////////////////////////////////////////////////////////                                                                                                                                                                             
          //BGK                                                                                                 
          //////////////////////////////////////////////////////////////////////////                            
-         //(D.f[ E   ])[k   ] = fW    ;                                                                     
-         //(D.f[ W   ])[kw  ] = fE    ;                                                                     
-         //(D.f[ N   ])[k   ] = fS    ;
-         //(D.f[ S   ])[ks  ] = fN    ;
-         //(D.f[ T   ])[k   ] = fB    ;
-         //(D.f[ B   ])[kb  ] = fT    ;
-         //(D.f[ NE  ])[k   ] = fSW   ;
-         //(D.f[ SW  ])[ksw ] = fNE   ;
-         //(D.f[ SE  ])[ks  ] = fNW   ;
-         //(D.f[ NW  ])[kw  ] = fSE   ;
-         //(D.f[ TE  ])[k   ] = fBW   ;
-         //(D.f[ BW  ])[kbw ] = fTE   ;
-         //(D.f[ BE  ])[kb  ] = fTW   ;
-         //(D.f[ TW  ])[kw  ] = fBE   ;
-         //(D.f[ TN  ])[k   ] = fBS   ;
-         //(D.f[ BS  ])[kbs ] = fTN   ;
-         //(D.f[ BN  ])[kb  ] = fTS   ;
-         //(D.f[ TS  ])[ks  ] = fBN   ;
-         //(D.f[ REST])[k   ] = fZERO ;
-         //(D.f[ TNE ])[k   ] = fBSW  ;
-         //(D.f[ TSE ])[ks  ] = fBNW  ;
-         //(D.f[ BNE ])[kb  ] = fTSW  ;
-         //(D.f[ BSE ])[kbs ] = fTNW  ;
-         //(D.f[ TNW ])[kw  ] = fBSE  ;
-         //(D.f[ TSW ])[ksw ] = fBNE  ;
-         //(D.f[ BNW ])[kbw ] = fTSE  ;
-         //(D.f[ BSW ])[kbsw] = fTNE  ;
+         //(D.f[ DIR_P00   ])[k   ] = fW    ;                                                                     
+         //(D.f[ DIR_M00   ])[kw  ] = fE    ;                                                                     
+         //(D.f[ DIR_0P0   ])[k   ] = fS    ;
+         //(D.f[ DIR_0M0   ])[ks  ] = fN    ;
+         //(D.f[ DIR_00P   ])[k   ] = fB    ;
+         //(D.f[ DIR_00M   ])[kb  ] = fT    ;
+         //(D.f[ DIR_PP0  ])[k   ] = fSW   ;
+         //(D.f[ DIR_MM0  ])[ksw ] = fNE   ;
+         //(D.f[ DIR_PM0  ])[ks  ] = fNW   ;
+         //(D.f[ DIR_MP0  ])[kw  ] = fSE   ;
+         //(D.f[ DIR_P0P  ])[k   ] = fBW   ;
+         //(D.f[ DIR_M0M  ])[kbw ] = fTE   ;
+         //(D.f[ DIR_P0M  ])[kb  ] = fTW   ;
+         //(D.f[ DIR_M0P  ])[kw  ] = fBE   ;
+         //(D.f[ DIR_0PP  ])[k   ] = fBS   ;
+         //(D.f[ DIR_0MM  ])[kbs ] = fTN   ;
+         //(D.f[ DIR_0PM  ])[kb  ] = fTS   ;
+         //(D.f[ DIR_0MP  ])[ks  ] = fBN   ;
+         //(D.f[ DIR_000])[k   ] = fZERO ;
+         //(D.f[ DIR_PPP ])[k   ] = fBSW  ;
+         //(D.f[ DIR_PMP ])[ks  ] = fBNW  ;
+         //(D.f[ DIR_PPM ])[kb  ] = fTSW  ;
+         //(D.f[ DIR_PMM ])[kbs ] = fTNW  ;
+         //(D.f[ DIR_MPP ])[kw  ] = fBSE  ;
+         //(D.f[ DIR_MMP ])[ksw ] = fBNE  ;
+         //(D.f[ DIR_MPM ])[kbw ] = fTSE  ;
+         //(D.f[ DIR_MMM ])[kbsw] = fTNE  ;
       }                                                                                                                    
    }
 }
@@ -2315,7 +2315,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
+__global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
                                                       unsigned int* bcMatD,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
@@ -2346,63 +2346,63 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DDStart[E   *size_Mat];
-            D.f[W   ] = &DDStart[W   *size_Mat];
-            D.f[N   ] = &DDStart[N   *size_Mat];
-            D.f[S   ] = &DDStart[S   *size_Mat];
-            D.f[T   ] = &DDStart[T   *size_Mat];
-            D.f[B   ] = &DDStart[B   *size_Mat];
-            D.f[NE  ] = &DDStart[NE  *size_Mat];
-            D.f[SW  ] = &DDStart[SW  *size_Mat];
-            D.f[SE  ] = &DDStart[SE  *size_Mat];
-            D.f[NW  ] = &DDStart[NW  *size_Mat];
-            D.f[TE  ] = &DDStart[TE  *size_Mat];
-            D.f[BW  ] = &DDStart[BW  *size_Mat];
-            D.f[BE  ] = &DDStart[BE  *size_Mat];
-            D.f[TW  ] = &DDStart[TW  *size_Mat];
-            D.f[TN  ] = &DDStart[TN  *size_Mat];
-            D.f[BS  ] = &DDStart[BS  *size_Mat];
-            D.f[BN  ] = &DDStart[BN  *size_Mat];
-            D.f[TS  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[TNE ] = &DDStart[TNE *size_Mat];
-            D.f[TSW ] = &DDStart[TSW *size_Mat];
-            D.f[TSE ] = &DDStart[TSE *size_Mat];
-            D.f[TNW ] = &DDStart[TNW *size_Mat];
-            D.f[BNE ] = &DDStart[BNE *size_Mat];
-            D.f[BSW ] = &DDStart[BSW *size_Mat];
-            D.f[BSE ] = &DDStart[BSE *size_Mat];
-            D.f[BNW ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DDStart[E   *size_Mat];
-            D.f[E   ] = &DDStart[W   *size_Mat];
-            D.f[S   ] = &DDStart[N   *size_Mat];
-            D.f[N   ] = &DDStart[S   *size_Mat];
-            D.f[B   ] = &DDStart[T   *size_Mat];
-            D.f[T   ] = &DDStart[B   *size_Mat];
-            D.f[SW  ] = &DDStart[NE  *size_Mat];
-            D.f[NE  ] = &DDStart[SW  *size_Mat];
-            D.f[NW  ] = &DDStart[SE  *size_Mat];
-            D.f[SE  ] = &DDStart[NW  *size_Mat];
-            D.f[BW  ] = &DDStart[TE  *size_Mat];
-            D.f[TE  ] = &DDStart[BW  *size_Mat];
-            D.f[TW  ] = &DDStart[BE  *size_Mat];
-            D.f[BE  ] = &DDStart[TW  *size_Mat];
-            D.f[BS  ] = &DDStart[TN  *size_Mat];
-            D.f[TN  ] = &DDStart[BS  *size_Mat];
-            D.f[TS  ] = &DDStart[BN  *size_Mat];
-            D.f[BN  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[BSW ] = &DDStart[TNE *size_Mat];
-            D.f[BNE ] = &DDStart[TSW *size_Mat];
-            D.f[BNW ] = &DDStart[TSE *size_Mat];
-            D.f[BSE ] = &DDStart[TNW *size_Mat];
-            D.f[TSW ] = &DDStart[BNE *size_Mat];
-            D.f[TNE ] = &DDStart[BSW *size_Mat];
-            D.f[TNW ] = &DDStart[BSE *size_Mat];
-            D.f[TSE ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2435,33 +2435,33 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[E   ])[k  ];//ke
-         real fW    =  (D.f[W   ])[kw ];
-         real fN    =  (D.f[N   ])[k  ];//kn
-         real fS    =  (D.f[S   ])[ks ];
-         real fT    =  (D.f[T   ])[k  ];//kt
-         real fB    =  (D.f[B   ])[kb ];
-         real fNE   =  (D.f[NE  ])[k  ];//kne
-         real fSW   =  (D.f[SW  ])[ksw];
-         real fSE   =  (D.f[SE  ])[ks ];//kse
-         real fNW   =  (D.f[NW  ])[kw ];//knw
-         real fTE   =  (D.f[TE  ])[k  ];//kte
-         real fBW   =  (D.f[BW  ])[kbw];
-         real fBE   =  (D.f[BE  ])[kb ];//kbe
-         real fTW   =  (D.f[TW  ])[kw ];//ktw
-         real fTN   =  (D.f[TN  ])[k  ];//ktn
-         real fBS   =  (D.f[BS  ])[kbs];
-         real fBN   =  (D.f[BN  ])[kb ];//kbn
-         real fTS   =  (D.f[TS  ])[ks ];//kts
-         real fZERO =  (D.f[REST])[k  ];//kzero
-         real fTNE   = (D.f[TNE ])[k  ];//ktne
-         real fTSW   = (D.f[TSW ])[ksw];//ktsw
-         real fTSE   = (D.f[TSE ])[ks ];//ktse
-         real fTNW   = (D.f[TNW ])[kw ];//ktnw
-         real fBNE   = (D.f[BNE ])[kb ];//kbne
-         real fBSW   = (D.f[BSW ])[kbsw];
-         real fBSE   = (D.f[BSE ])[kbs];//kbse
-         real fBNW   = (D.f[BNW ])[kbw];//kbnw
+         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
+         real fW    =  (D.f[DIR_M00   ])[kw ];
+         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0   ])[ks ];
+         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
+         real fB    =  (D.f[DIR_00M   ])[kb ];
+         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0  ])[ksw];
+         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M  ])[kbw];
+         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM  ])[kbs];
+         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fZERO =  (D.f[DIR_000])[k  ];//kzero
+         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM ])[kbsw];
+         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -2737,65 +2737,65 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
                   c2o1*(vx2y*MzYZZ +  vx*MzXYYZZ + vz*MzXXYYZ + vyz2*MzXXY + vx2z*MzYYZ + vxy2*MzXZZ + vxz2*MzXYY + vy*MzXXYZZ + vy2z*MzXXZ)+ 
                   c4o1*(vxy2z*MzXZ + vx2yz*MzYZ + vxyz2*MzXY + vxy*MzXYZZ + vxz*MzXYYZ + vyz*MzXXYZ);
 
-         (D.f[ E   ])[k   ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102 - vx   );   //ke
-         (D.f[ W   ])[kw  ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102 + vx   );   
-         (D.f[ N   ])[k   ] =   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012 - vy   );   //kn
-         (D.f[ S   ])[ks  ] =   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012 + vy   );   
-         (D.f[ T   ])[k   ] =   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002 - vz   );   //kt
-         (D.f[ B   ])[kb  ] =   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002 + vz   );   
-         (D.f[ NE  ])[k   ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
-         (D.f[ SW  ])[ksw ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
-         (D.f[ SE  ])[ks  ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
-         (D.f[ NW  ])[kw  ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
-         (D.f[ TE  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
-         (D.f[ BW  ])[kbw ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
-         (D.f[ BE  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
-         (D.f[ TW  ])[kw  ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
-         (D.f[ TN  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
-         (D.f[ BS  ])[kbs ] =  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
-         (D.f[ BN  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
-         (D.f[ TS  ])[ks  ] =  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
-         (D.f[ REST])[k   ] =       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        )+rho0;   //kzero
-         (D.f[ TNE ])[k   ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
-         (D.f[ TSE ])[ks  ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
-         (D.f[ BNE ])[kb  ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
-         (D.f[ BSE ])[kbs ] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
-         (D.f[ TNW ])[kw  ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
-         (D.f[ TSW ])[ksw ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
-         (D.f[ BNW ])[kbw ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
-         (D.f[ BSW ])[kbsw] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
+         (D.f[ DIR_P00   ])[k   ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102 - vx   );   //ke
+         (D.f[ DIR_M00   ])[kw  ] =   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102 + vx   );   
+         (D.f[ DIR_0P0   ])[k   ] =   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012 - vy   );   //kn
+         (D.f[ DIR_0M0   ])[ks  ] =   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012 + vy   );   
+         (D.f[ DIR_00P   ])[k   ] =   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002 - vz   );   //kt
+         (D.f[ DIR_00M   ])[kb  ] =   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002 + vz   );   
+         (D.f[ DIR_PP0  ])[k   ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
+         (D.f[ DIR_MM0  ])[ksw ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
+         (D.f[ DIR_PM0  ])[ks  ] =  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
+         (D.f[ DIR_MP0  ])[kw  ] =  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
+         (D.f[ DIR_P0P  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
+         (D.f[ DIR_M0M  ])[kbw ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
+         (D.f[ DIR_P0M  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
+         (D.f[ DIR_M0P  ])[kw  ] =  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
+         (D.f[ DIR_0PP  ])[k   ] =  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
+         (D.f[ DIR_0MM  ])[kbs ] =  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
+         (D.f[ DIR_0PM  ])[kb  ] =  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
+         (D.f[ DIR_0MP  ])[ks  ] =  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
+         (D.f[ DIR_000])[k   ] =       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        )+rho0;   //kzero
+         (D.f[ DIR_PPP ])[k   ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
+         (D.f[ DIR_PMP ])[ks  ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
+         (D.f[ DIR_PPM ])[kb  ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
+         (D.f[ DIR_PMM ])[kbs ] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
+         (D.f[ DIR_MPP ])[kw  ] = c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
+         (D.f[ DIR_MMP ])[ksw ] = c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
+         (D.f[ DIR_MPM ])[kbw ] = c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
+         (D.f[ DIR_MMM ])[kbsw] = c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
 
 
          //////////////////////////////////////////////////////////////////////////
          //BGK
          //////////////////////////////////////////////////////////////////////////
-         //(D.f[ E   ])[k   ] = fW    ;
-         //(D.f[ W   ])[kw  ] = fE    ;
-         //(D.f[ N   ])[k   ] = fS    ;
-         //(D.f[ S   ])[ks  ] = fN    ;
-         //(D.f[ T   ])[k   ] = fB    ;
-         //(D.f[ B   ])[kb  ] = fT    ;
-         //(D.f[ NE  ])[k   ] = fSW   ;
-         //(D.f[ SW  ])[ksw ] = fNE   ;
-         //(D.f[ SE  ])[ks  ] = fNW   ;
-         //(D.f[ NW  ])[kw  ] = fSE   ;
-         //(D.f[ TE  ])[k   ] = fBW   ;
-         //(D.f[ BW  ])[kbw ] = fTE   ;
-         //(D.f[ BE  ])[kb  ] = fTW   ;
-         //(D.f[ TW  ])[kw  ] = fBE   ;
-         //(D.f[ TN  ])[k   ] = fBS   ;
-         //(D.f[ BS  ])[kbs ] = fTN   ;
-         //(D.f[ BN  ])[kb  ] = fTS   ;
-         //(D.f[ TS  ])[ks  ] = fBN   ;
-         //(D.f[ REST])[k   ] = fZERO ;
-         //(D.f[ TNE ])[k   ] = fBSW  ;
-         //(D.f[ TSE ])[ks  ] = fBNW  ;
-         //(D.f[ BNE ])[kb  ] = fTSW  ;
-         //(D.f[ BSE ])[kbs ] = fTNW  ;
-         //(D.f[ TNW ])[kw  ] = fBSE  ;
-         //(D.f[ TSW ])[ksw ] = fBNE  ;
-         //(D.f[ BNW ])[kbw ] = fTSE  ;
-         //(D.f[ BSW ])[kbsw] = fTNE  ;
+         //(D.f[ DIR_P00   ])[k   ] = fW    ;
+         //(D.f[ DIR_M00   ])[kw  ] = fE    ;
+         //(D.f[ DIR_0P0   ])[k   ] = fS    ;
+         //(D.f[ DIR_0M0   ])[ks  ] = fN    ;
+         //(D.f[ DIR_00P   ])[k   ] = fB    ;
+         //(D.f[ DIR_00M   ])[kb  ] = fT    ;
+         //(D.f[ DIR_PP0  ])[k   ] = fSW   ;
+         //(D.f[ DIR_MM0  ])[ksw ] = fNE   ;
+         //(D.f[ DIR_PM0  ])[ks  ] = fNW   ;
+         //(D.f[ DIR_MP0  ])[kw  ] = fSE   ;
+         //(D.f[ DIR_P0P  ])[k   ] = fBW   ;
+         //(D.f[ DIR_M0M  ])[kbw ] = fTE   ;
+         //(D.f[ DIR_P0M  ])[kb  ] = fTW   ;
+         //(D.f[ DIR_M0P  ])[kw  ] = fBE   ;
+         //(D.f[ DIR_0PP  ])[k   ] = fBS   ;
+         //(D.f[ DIR_0MM  ])[kbs ] = fTN   ;
+         //(D.f[ DIR_0PM  ])[kb  ] = fTS   ;
+         //(D.f[ DIR_0MP  ])[ks  ] = fBN   ;
+         //(D.f[ DIR_000])[k   ] = fZERO ;
+         //(D.f[ DIR_PPP ])[k   ] = fBSW  ;
+         //(D.f[ DIR_PMP ])[ks  ] = fBNW  ;
+         //(D.f[ DIR_PPM ])[kb  ] = fTSW  ;
+         //(D.f[ DIR_PMM ])[kbs ] = fTNW  ;
+         //(D.f[ DIR_MPP ])[kw  ] = fBSE  ;
+         //(D.f[ DIR_MMP ])[ksw ] = fBNE  ;
+         //(D.f[ DIR_MPM ])[kbw ] = fTSE  ;
+         //(D.f[ DIR_MMM ])[kbsw] = fTNE  ;
       }                                                                                                                    
    }
 }
@@ -2840,7 +2840,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
+__global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
                                                         unsigned int* bcMatD,
                                                         unsigned int* neighborX,
                                                         unsigned int* neighborY,
@@ -2871,63 +2871,63 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DDStart[E   *size_Mat];
-            D.f[W   ] = &DDStart[W   *size_Mat];
-            D.f[N   ] = &DDStart[N   *size_Mat];
-            D.f[S   ] = &DDStart[S   *size_Mat];
-            D.f[T   ] = &DDStart[T   *size_Mat];
-            D.f[B   ] = &DDStart[B   *size_Mat];
-            D.f[NE  ] = &DDStart[NE  *size_Mat];
-            D.f[SW  ] = &DDStart[SW  *size_Mat];
-            D.f[SE  ] = &DDStart[SE  *size_Mat];
-            D.f[NW  ] = &DDStart[NW  *size_Mat];
-            D.f[TE  ] = &DDStart[TE  *size_Mat];
-            D.f[BW  ] = &DDStart[BW  *size_Mat];
-            D.f[BE  ] = &DDStart[BE  *size_Mat];
-            D.f[TW  ] = &DDStart[TW  *size_Mat];
-            D.f[TN  ] = &DDStart[TN  *size_Mat];
-            D.f[BS  ] = &DDStart[BS  *size_Mat];
-            D.f[BN  ] = &DDStart[BN  *size_Mat];
-            D.f[TS  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[TNE ] = &DDStart[TNE *size_Mat];
-            D.f[TSW ] = &DDStart[TSW *size_Mat];
-            D.f[TSE ] = &DDStart[TSE *size_Mat];
-            D.f[TNW ] = &DDStart[TNW *size_Mat];
-            D.f[BNE ] = &DDStart[BNE *size_Mat];
-            D.f[BSW ] = &DDStart[BSW *size_Mat];
-            D.f[BSE ] = &DDStart[BSE *size_Mat];
-            D.f[BNW ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DDStart[E   *size_Mat];
-            D.f[E   ] = &DDStart[W   *size_Mat];
-            D.f[S   ] = &DDStart[N   *size_Mat];
-            D.f[N   ] = &DDStart[S   *size_Mat];
-            D.f[B   ] = &DDStart[T   *size_Mat];
-            D.f[T   ] = &DDStart[B   *size_Mat];
-            D.f[SW  ] = &DDStart[NE  *size_Mat];
-            D.f[NE  ] = &DDStart[SW  *size_Mat];
-            D.f[NW  ] = &DDStart[SE  *size_Mat];
-            D.f[SE  ] = &DDStart[NW  *size_Mat];
-            D.f[BW  ] = &DDStart[TE  *size_Mat];
-            D.f[TE  ] = &DDStart[BW  *size_Mat];
-            D.f[TW  ] = &DDStart[BE  *size_Mat];
-            D.f[BE  ] = &DDStart[TW  *size_Mat];
-            D.f[BS  ] = &DDStart[TN  *size_Mat];
-            D.f[TN  ] = &DDStart[BS  *size_Mat];
-            D.f[TS  ] = &DDStart[BN  *size_Mat];
-            D.f[BN  ] = &DDStart[TS  *size_Mat];
-            D.f[REST] = &DDStart[REST*size_Mat];
-            D.f[BSW ] = &DDStart[TNE *size_Mat];
-            D.f[BNE ] = &DDStart[TSW *size_Mat];
-            D.f[BNW ] = &DDStart[TSE *size_Mat];
-            D.f[BSE ] = &DDStart[TNW *size_Mat];
-            D.f[TSW ] = &DDStart[BNE *size_Mat];
-            D.f[TNE ] = &DDStart[BSW *size_Mat];
-            D.f[TNW ] = &DDStart[BSE *size_Mat];
-            D.f[TSE ] = &DDStart[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
          }
 
          ////////////////////////////////////////////////////////////////////////////////
@@ -2960,33 +2960,33 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
          //unsigned int ktne = k;
          unsigned int kbsw = neighborZ[ksw];
          //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         real fE    =  (D.f[E   ])[k  ];//ke
-         real fW    =  (D.f[W   ])[kw ];
-         real fN    =  (D.f[N   ])[k  ];//kn
-         real fS    =  (D.f[S   ])[ks ];
-         real fT    =  (D.f[T   ])[k  ];//kt
-         real fB    =  (D.f[B   ])[kb ];
-         real fNE   =  (D.f[NE  ])[k  ];//kne
-         real fSW   =  (D.f[SW  ])[ksw];
-         real fSE   =  (D.f[SE  ])[ks ];//kse
-         real fNW   =  (D.f[NW  ])[kw ];//knw
-         real fTE   =  (D.f[TE  ])[k  ];//kte
-         real fBW   =  (D.f[BW  ])[kbw];
-         real fBE   =  (D.f[BE  ])[kb ];//kbe
-         real fTW   =  (D.f[TW  ])[kw ];//ktw
-         real fTN   =  (D.f[TN  ])[k  ];//ktn
-         real fBS   =  (D.f[BS  ])[kbs];
-         real fBN   =  (D.f[BN  ])[kb ];//kbn
-         real fTS   =  (D.f[TS  ])[ks ];//kts
-         real fZERO =  (D.f[REST])[k  ];//kzero
-         real fTNE   = (D.f[TNE ])[k  ];//ktne
-         real fTSW   = (D.f[TSW ])[ksw];//ktsw
-         real fTSE   = (D.f[TSE ])[ks ];//ktse
-         real fTNW   = (D.f[TNW ])[kw ];//ktnw
-         real fBNE   = (D.f[BNE ])[kb ];//kbne
-         real fBSW   = (D.f[BSW ])[kbsw];
-         real fBSE   = (D.f[BSE ])[kbs];//kbse
-         real fBNW   = (D.f[BNW ])[kbw];//kbnw
+         real fE    =  (D.f[DIR_P00   ])[k  ];//ke
+         real fW    =  (D.f[DIR_M00   ])[kw ];
+         real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
+         real fS    =  (D.f[DIR_0M0   ])[ks ];
+         real fT    =  (D.f[DIR_00P   ])[k  ];//kt
+         real fB    =  (D.f[DIR_00M   ])[kb ];
+         real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
+         real fSW   =  (D.f[DIR_MM0  ])[ksw];
+         real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
+         real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
+         real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
+         real fBW   =  (D.f[DIR_M0M  ])[kbw];
+         real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
+         real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
+         real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
+         real fBS   =  (D.f[DIR_0MM  ])[kbs];
+         real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
+         real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+         real fZERO =  (D.f[DIR_000])[k  ];//kzero
+         real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
+         real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
+         real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
+         real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
+         real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
+         real fBSW   = (D.f[DIR_MMM ])[kbsw];
+         real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
+         real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
          ////////////////////////////////////////////////////////////////////////////////
          real rho0   =  fZERO+fE+fW+fN+fS+fT+fB+fNE+fSW+fSE+fNW+fTE+fBW+fBE+fTW+fTN+fBS+fBN+fTS+fTNE+fTSW+fTSE+fTNW+fBNE+fBSW+fBSE+fBNW;
          real rho    =  rho0 + c1o1;
@@ -3259,65 +3259,65 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
             c2o1*(vx2y*MzYZZ +  vx*MzXYYZZ + vz*MzXXYYZ + vyz2*MzXXY + vx2z*MzYYZ + vxy2*MzXZZ + vxz2*MzXYY + vy*MzXXYZZ + vy2z*MzXXZ)+ 
             c4o1*(vxy2z*MzXZ + vx2yz*MzYZ + vxyz2*MzXY + vxy*MzXYZZ + vxz*MzXYYZ + vyz*MzXXYZ);
 
-         (D.f[ E   ])[k   ] = fW    -   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102        );   //ke
-         (D.f[ W   ])[kw  ] = fE    -   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102        );   
-         (D.f[ N   ])[k   ] = fS    -   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012        );   //kn
-         (D.f[ S   ])[ks  ] = fN    -   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012        );   
-         (D.f[ T   ])[k   ] = fB    -   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002        );   //kt
-         (D.f[ B   ])[kb  ] = fT    -   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002        );   
-         (D.f[ NE  ])[k   ] = fSW   -  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
-         (D.f[ SW  ])[ksw ] = fNE   -  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
-         (D.f[ SE  ])[ks  ] = fNW   -  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
-         (D.f[ NW  ])[kw  ] = fSE   -  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
-         (D.f[ TE  ])[k   ] = fBW   -  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
-         (D.f[ BW  ])[kbw ] = fTE   -  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
-         (D.f[ BE  ])[kb  ] = fTW   -  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
-         (D.f[ TW  ])[kw  ] = fBE   -  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
-         (D.f[ TN  ])[k   ] = fBS   -  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
-         (D.f[ BS  ])[kbs ] = fTN   -  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
-         (D.f[ BN  ])[kb  ] = fTS   -  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
-         (D.f[ TS  ])[ks  ] = fBN   -  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
-         (D.f[ REST])[k   ] = fZERO -       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        );   //kzero
-         (D.f[ TNE ])[k   ] = fBSW  - c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
-         (D.f[ TSE ])[ks  ] = fBNW  - c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
-         (D.f[ BNE ])[kb  ] = fTSW  - c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
-         (D.f[ BSE ])[kbs ] = fTNW  - c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
-         (D.f[ TNW ])[kw  ] = fBSE  - c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
-         (D.f[ TSW ])[ksw ] = fBNE  - c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
-         (D.f[ BNW ])[kbw ] = fTSE  - c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
-         (D.f[ BSW ])[kbsw] = fTNE  - c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
+         (D.f[ DIR_P00   ])[k   ] = fW    -   c1o2*rho*( mu200  - mu220 + mu222 - mu202 +  mu120 - mu122 + mu102        );   //ke
+         (D.f[ DIR_M00   ])[kw  ] = fE    -   c1o2*rho*( mu200  - mu220 + mu222 - mu202 -  mu120 + mu122 - mu102        );   
+         (D.f[ DIR_0P0   ])[k   ] = fS    -   c1o2*rho*( mu210  - mu220 + mu222 - mu212 +  mu020 - mu022 + mu012        );   //kn
+         (D.f[ DIR_0M0   ])[ks  ] = fN    -   c1o2*rho*(-mu210  - mu220 + mu222 + mu212 +  mu020 - mu022 - mu012        );   
+         (D.f[ DIR_00P   ])[k   ] = fB    -   c1o2*rho*(-mu221  + mu222 + mu201 - mu202 +  mu021 - mu022 + mu002        );   //kt
+         (D.f[ DIR_00M   ])[kb  ] = fT    -   c1o2*rho*( mu221  + mu222 - mu201 - mu202 -  mu021 - mu022 + mu002        );   
+         (D.f[ DIR_PP0  ])[k   ] = fSW   -  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 +  mu110 - mu120 + mu122 - mu112);   //kne
+         (D.f[ DIR_MM0  ])[ksw ] = fNE   -  c1o4*rho*( mu210  + mu220 - mu222 - mu212 +  mu110 + mu120 - mu122 - mu112);   
+         (D.f[ DIR_PM0  ])[ks  ] = fNW   -  c1o4*rho*( mu210  + mu220 - mu222 - mu212 -  mu110 - mu120 + mu122 + mu112);   //kse
+         (D.f[ DIR_MP0  ])[kw  ] = fSE   -  c1o4*rho*(-mu210  + mu220 - mu222 + mu212 -  mu110 + mu120 - mu122 + mu112);   //knw
+         (D.f[ DIR_P0P  ])[k   ] = fBW   -  c1o4*rho*( mu221  - mu222 - mu201 + mu202 -  mu121 + mu122 + mu101 - mu102);   //kte
+         (D.f[ DIR_M0M  ])[kbw ] = fTE   -  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 -  mu121 - mu122 + mu101 + mu102);   
+         (D.f[ DIR_P0M  ])[kb  ] = fTW   -  c1o4*rho*(-mu221  - mu222 + mu201 + mu202 +  mu121 + mu122 - mu101 - mu102);   //kbe
+         (D.f[ DIR_M0P  ])[kw  ] = fBE   -  c1o4*rho*( mu221  - mu222 - mu201 + mu202 +  mu121 - mu122 - mu101 + mu102);   //ktw
+         (D.f[ DIR_0PP  ])[k   ] = fBS   -  c1o4*rho*( mu221  - mu222 - mu211 + mu212 -  mu021 + mu022 + mu011 - mu012);   //ktn
+         (D.f[ DIR_0MM  ])[kbs ] = fTN   -  c1o4*rho*(-mu221  - mu222 - mu211 - mu212 +  mu021 + mu022 + mu011 + mu012);   
+         (D.f[ DIR_0PM  ])[kb  ] = fTS   -  c1o4*rho*(-mu221  - mu222 + mu211 + mu212 +  mu021 + mu022 - mu011 - mu012);   //kbn
+         (D.f[ DIR_0MP  ])[ks  ] = fBN   -  c1o4*rho*( mu221  - mu222 + mu211 - mu212 -  mu021 + mu022 - mu011 + mu012);   //kts
+         (D.f[ DIR_000])[k   ] = fZERO -       rho*(-mu200  + mu220 - mu222 + mu202 -  mu020 + mu022 - mu002        );   //kzero
+         (D.f[ DIR_PPP ])[k   ] = fBSW  - c1o8*rho*(-mu221  + mu222 + mu211 - mu212 +  mu121 - mu122 - mu111 + mu112);   //ktne
+         (D.f[ DIR_PMP ])[ks  ] = fBNW  - c1o8*rho*(-mu221  + mu222 - mu211 + mu212 +  mu121 - mu122 + mu111 - mu112);   //ktse
+         (D.f[ DIR_PPM ])[kb  ] = fTSW  - c1o8*rho*( mu221  + mu222 - mu211 - mu212 -  mu121 - mu122 + mu111 + mu112);   //kbne
+         (D.f[ DIR_PMM ])[kbs ] = fTNW  - c1o8*rho*( mu221  + mu222 + mu211 + mu212 -  mu121 - mu122 - mu111 - mu112);   //kbse
+         (D.f[ DIR_MPP ])[kw  ] = fBSE  - c1o8*rho*(-mu221  + mu222 + mu211 - mu212 -  mu121 + mu122 + mu111 - mu112);   //ktnw
+         (D.f[ DIR_MMP ])[ksw ] = fBNE  - c1o8*rho*(-mu221  + mu222 - mu211 + mu212 -  mu121 + mu122 - mu111 + mu112);   //ktsw
+         (D.f[ DIR_MPM ])[kbw ] = fTSE  - c1o8*rho*( mu221  + mu222 - mu211 - mu212 +  mu121 + mu122 - mu111 - mu112);   //kbnw
+         (D.f[ DIR_MMM ])[kbsw] = fTNE  - c1o8*rho*( mu221  + mu222 + mu211 + mu212 +  mu121 + mu122 + mu111 + mu112);   
 
 
          //////////////////////////////////////////////////////////////////////////
          //BGK
          //////////////////////////////////////////////////////////////////////////
-         //(D.f[ E   ])[k   ] = fW    ;
-         //(D.f[ W   ])[kw  ] = fE    ;
-         //(D.f[ N   ])[k   ] = fS    ;
-         //(D.f[ S   ])[ks  ] = fN    ;
-         //(D.f[ T   ])[k   ] = fB    ;
-         //(D.f[ B   ])[kb  ] = fT    ;
-         //(D.f[ NE  ])[k   ] = fSW   ;
-         //(D.f[ SW  ])[ksw ] = fNE   ;
-         //(D.f[ SE  ])[ks  ] = fNW   ;
-         //(D.f[ NW  ])[kw  ] = fSE   ;
-         //(D.f[ TE  ])[k   ] = fBW   ;
-         //(D.f[ BW  ])[kbw ] = fTE   ;
-         //(D.f[ BE  ])[kb  ] = fTW   ;
-         //(D.f[ TW  ])[kw  ] = fBE   ;
-         //(D.f[ TN  ])[k   ] = fBS   ;
-         //(D.f[ BS  ])[kbs ] = fTN   ;
-         //(D.f[ BN  ])[kb  ] = fTS   ;
-         //(D.f[ TS  ])[ks  ] = fBN   ;
-         //(D.f[ REST])[k   ] = fZERO ;
-         //(D.f[ TNE ])[k   ] = fBSW  ;
-         //(D.f[ TSE ])[ks  ] = fBNW  ;
-         //(D.f[ BNE ])[kb  ] = fTSW  ;
-         //(D.f[ BSE ])[kbs ] = fTNW  ;
-         //(D.f[ TNW ])[kw  ] = fBSE  ;
-         //(D.f[ TSW ])[ksw ] = fBNE  ;
-         //(D.f[ BNW ])[kbw ] = fTSE  ;
-         //(D.f[ BSW ])[kbsw] = fTNE  ;
+         //(D.f[ DIR_P00   ])[k   ] = fW    ;
+         //(D.f[ DIR_M00   ])[kw  ] = fE    ;
+         //(D.f[ DIR_0P0   ])[k   ] = fS    ;
+         //(D.f[ DIR_0M0   ])[ks  ] = fN    ;
+         //(D.f[ DIR_00P   ])[k   ] = fB    ;
+         //(D.f[ DIR_00M   ])[kb  ] = fT    ;
+         //(D.f[ DIR_PP0  ])[k   ] = fSW   ;
+         //(D.f[ DIR_MM0  ])[ksw ] = fNE   ;
+         //(D.f[ DIR_PM0  ])[ks  ] = fNW   ;
+         //(D.f[ DIR_MP0  ])[kw  ] = fSE   ;
+         //(D.f[ DIR_P0P  ])[k   ] = fBW   ;
+         //(D.f[ DIR_M0M  ])[kbw ] = fTE   ;
+         //(D.f[ DIR_P0M  ])[kb  ] = fTW   ;
+         //(D.f[ DIR_M0P  ])[kw  ] = fBE   ;
+         //(D.f[ DIR_0PP  ])[k   ] = fBS   ;
+         //(D.f[ DIR_0MM  ])[kbs ] = fTN   ;
+         //(D.f[ DIR_0PM  ])[kb  ] = fTS   ;
+         //(D.f[ DIR_0MP  ])[ks  ] = fBN   ;
+         //(D.f[ DIR_000])[k   ] = fZERO ;
+         //(D.f[ DIR_PPP ])[k   ] = fBSW  ;
+         //(D.f[ DIR_PMP ])[ks  ] = fBNW  ;
+         //(D.f[ DIR_PPM ])[kb  ] = fTSW  ;
+         //(D.f[ DIR_PMM ])[kbs ] = fTNW  ;
+         //(D.f[ DIR_MPP ])[kw  ] = fBSE  ;
+         //(D.f[ DIR_MMP ])[ksw ] = fBNE  ;
+         //(D.f[ DIR_MPM ])[kbw ] = fTSE  ;
+         //(D.f[ DIR_MMM ])[kbsw] = fTNE  ;
       }                                                                                                                    
    }
 }
@@ -3362,7 +3362,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_Diff_27(real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
+__global__ void LB_Kernel_Casc_SP_27(  real omega,
                                                   unsigned int* bcMatD,
                                                   unsigned int* neighborX,
                                                   unsigned int* neighborY,
@@ -3393,63 +3393,63 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
        Distributions27 D;
        if (EvenOrOdd==true)
        {
-          D.f[E   ] = &DDStart[E   *size_Mat];
-          D.f[W   ] = &DDStart[W   *size_Mat];
-          D.f[N   ] = &DDStart[N   *size_Mat];
-          D.f[S   ] = &DDStart[S   *size_Mat];
-          D.f[T   ] = &DDStart[T   *size_Mat];
-          D.f[B   ] = &DDStart[B   *size_Mat];
-          D.f[NE  ] = &DDStart[NE  *size_Mat];
-          D.f[SW  ] = &DDStart[SW  *size_Mat];
-          D.f[SE  ] = &DDStart[SE  *size_Mat];
-          D.f[NW  ] = &DDStart[NW  *size_Mat];
-          D.f[TE  ] = &DDStart[TE  *size_Mat];
-          D.f[BW  ] = &DDStart[BW  *size_Mat];
-          D.f[BE  ] = &DDStart[BE  *size_Mat];
-          D.f[TW  ] = &DDStart[TW  *size_Mat];
-          D.f[TN  ] = &DDStart[TN  *size_Mat];
-          D.f[BS  ] = &DDStart[BS  *size_Mat];
-          D.f[BN  ] = &DDStart[BN  *size_Mat];
-          D.f[TS  ] = &DDStart[TS  *size_Mat];
-          D.f[REST] = &DDStart[REST*size_Mat];
-          D.f[TNE ] = &DDStart[TNE *size_Mat];
-          D.f[TSW ] = &DDStart[TSW *size_Mat];
-          D.f[TSE ] = &DDStart[TSE *size_Mat];
-          D.f[TNW ] = &DDStart[TNW *size_Mat];
-          D.f[BNE ] = &DDStart[BNE *size_Mat];
-          D.f[BSW ] = &DDStart[BSW *size_Mat];
-          D.f[BSE ] = &DDStart[BSE *size_Mat];
-          D.f[BNW ] = &DDStart[BNW *size_Mat];
+          D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+          D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+          D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+          D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+          D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+          D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+          D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+          D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+          D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+          D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+          D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+          D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+          D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+          D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+          D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+          D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+          D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+          D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+          D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+          D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+          D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+          D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+          D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+          D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+          D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+          D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
        }
        else
        {
-          D.f[W   ] = &DDStart[E   *size_Mat];
-          D.f[E   ] = &DDStart[W   *size_Mat];
-          D.f[S   ] = &DDStart[N   *size_Mat];
-          D.f[N   ] = &DDStart[S   *size_Mat];
-          D.f[B   ] = &DDStart[T   *size_Mat];
-          D.f[T   ] = &DDStart[B   *size_Mat];
-          D.f[SW  ] = &DDStart[NE  *size_Mat];
-          D.f[NE  ] = &DDStart[SW  *size_Mat];
-          D.f[NW  ] = &DDStart[SE  *size_Mat];
-          D.f[SE  ] = &DDStart[NW  *size_Mat];
-          D.f[BW  ] = &DDStart[TE  *size_Mat];
-          D.f[TE  ] = &DDStart[BW  *size_Mat];
-          D.f[TW  ] = &DDStart[BE  *size_Mat];
-          D.f[BE  ] = &DDStart[TW  *size_Mat];
-          D.f[BS  ] = &DDStart[TN  *size_Mat];
-          D.f[TN  ] = &DDStart[BS  *size_Mat];
-          D.f[TS  ] = &DDStart[BN  *size_Mat];
-          D.f[BN  ] = &DDStart[TS  *size_Mat];
-          D.f[REST] = &DDStart[REST*size_Mat];
-          D.f[BSW ] = &DDStart[TNE *size_Mat];
-          D.f[BNE ] = &DDStart[TSW *size_Mat];
-          D.f[BNW ] = &DDStart[TSE *size_Mat];
-          D.f[BSE ] = &DDStart[TNW *size_Mat];
-          D.f[TSW ] = &DDStart[BNE *size_Mat];
-          D.f[TNE ] = &DDStart[BSW *size_Mat];
-          D.f[TNW ] = &DDStart[BSE *size_Mat];
-          D.f[TSE ] = &DDStart[BNW *size_Mat];
+          D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+          D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+          D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+          D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+          D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+          D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+          D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+          D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+          D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+          D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+          D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+          D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+          D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+          D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+          D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+          D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+          D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+          D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+          D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+          D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+          D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+          D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+          D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+          D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+          D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+          D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+          D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
        }
 
        ////////////////////////////////////////////////////////////////////////////////
@@ -3512,33 +3512,33 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
        real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
        //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-       f_E    =  (D.f[E   ])[ke]+c2o27;
-       f_W    =  (D.f[W   ])[kw]+c2o27;
-       f_N    =  (D.f[N   ])[kn]+c2o27;
-       f_S    =  (D.f[S   ])[ks]+c2o27;
-       f_T    =  (D.f[T   ])[kt]+c2o27;
-       f_B    =  (D.f[B   ])[kb]+c2o27;
-       f_NE   =  (D.f[NE  ])[kne]+c1o54;
-       f_SW   =  (D.f[SW  ])[ksw]+c1o54;
-       f_SE   =  (D.f[SE  ])[kse]+c1o54;
-       f_NW   =  (D.f[NW  ])[knw]+c1o54;
-       f_TE   =  (D.f[TE  ])[kte]+c1o54;
-       f_BW   =  (D.f[BW  ])[kbw]+c1o54;
-       f_BE   =  (D.f[BE  ])[kbe]+c1o54;
-       f_TW   =  (D.f[TW  ])[ktw]+c1o54;
-       f_TN   =  (D.f[TN  ])[ktn]+c1o54;
-       f_BS   =  (D.f[BS  ])[kbs]+c1o54;
-       f_BN   =  (D.f[BN  ])[kbn]+c1o54;
-       f_TS   =  (D.f[TS  ])[kts]+c1o54;
-       f_ZERO =  (D.f[REST])[kzero]+c8o27;
-       f_TNE   = (D.f[TNE ])[ktne]+c1o216;
-       f_TSW   = (D.f[TSW ])[ktsw]+c1o216;
-       f_TSE   = (D.f[TSE ])[ktse]+c1o216;
-       f_TNW   = (D.f[TNW ])[ktnw]+c1o216;
-       f_BNE   = (D.f[BNE ])[kbne]+c1o216;
-       f_BSW   = (D.f[BSW ])[kbsw]+c1o216;
-       f_BSE   = (D.f[BSE ])[kbse]+c1o216;
-       f_BNW   = (D.f[BNW ])[kbnw]+c1o216;
+       f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
+       f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
+       f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
+       f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
+       f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
+       f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
+       f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
+       f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
+       f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
+       f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
+       f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
+       f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
+       f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
+       f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
+       f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
+       f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
+       f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
+       f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+       f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
+       f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
+       f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
+       f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
+       f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
+       f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
+       f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
+       f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
+       f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
        ////////////////////////////////////////////////////////////////////////////////
 
        if( BC == GEO_FLUID || BC == GEO_VELO)
@@ -3977,38 +3977,38 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
           }
         }
 
-       (D.f[ E  ])[ke ] = f_W-c2o27;
-       (D.f[ W  ])[kw ] = f_E-c2o27;
-
-       (D.f[ N  ])[kn ] = f_S-c2o27;
-       (D.f[ S  ])[ks ] = f_N-c2o27;
-       (D.f[ T  ])[kt ] = f_B-c2o27;
-       (D.f[ B  ])[kb ] = f_T-c2o27;
-
-       (D.f[ NE ])[kne] = f_SW-c1o54;
-       (D.f[ SW ])[ksw] = f_NE-c1o54;
-       (D.f[ SE ])[kse] = f_NW-c1o54;
-       (D.f[ NW ])[knw] = f_SE-c1o54;
-       (D.f[ TE ])[kte] = f_BW-c1o54;
-       (D.f[ BW ])[kbw] = f_TE-c1o54;
-       (D.f[ BE ])[kbe] = f_TW-c1o54;
-       (D.f[ TW ])[ktw] = f_BE-c1o54;
-
-       (D.f[ TN ])[ktn] = f_BS-c1o54;
-       (D.f[ BS ])[kbs] = f_TN-c1o54;
-       (D.f[ BN ])[kbn] = f_TS-c1o54;
-       (D.f[ TS ])[kts] = f_BN-c1o54;
-
-       (D.f[ REST])[k] = f_ZERO-c8o27;
-
-       (D.f[ TNE ])[ktne] = f_BSW-c1o216;
-       (D.f[ TSE ])[ktse] = f_BNW-c1o216;
-       (D.f[ BNE ])[kbne] = f_TSW-c1o216;
-       (D.f[ BSE ])[kbse] = f_TNW-c1o216;
-       (D.f[ TNW ])[ktnw] = f_BSE-c1o216;
-       (D.f[ TSW ])[ktsw] = f_BNE-c1o216;
-       (D.f[ BNW ])[kbnw] = f_TSE-c1o216;
-       (D.f[ BSW ])[kbsw] = f_TNE-c1o216;
+       (D.f[ DIR_P00  ])[ke ] = f_W-c2o27;
+       (D.f[ DIR_M00  ])[kw ] = f_E-c2o27;
+
+       (D.f[ DIR_0P0  ])[kn ] = f_S-c2o27;
+       (D.f[ DIR_0M0  ])[ks ] = f_N-c2o27;
+       (D.f[ DIR_00P  ])[kt ] = f_B-c2o27;
+       (D.f[ DIR_00M  ])[kb ] = f_T-c2o27;
+
+       (D.f[ DIR_PP0 ])[kne] = f_SW-c1o54;
+       (D.f[ DIR_MM0 ])[ksw] = f_NE-c1o54;
+       (D.f[ DIR_PM0 ])[kse] = f_NW-c1o54;
+       (D.f[ DIR_MP0 ])[knw] = f_SE-c1o54;
+       (D.f[ DIR_P0P ])[kte] = f_BW-c1o54;
+       (D.f[ DIR_M0M ])[kbw] = f_TE-c1o54;
+       (D.f[ DIR_P0M ])[kbe] = f_TW-c1o54;
+       (D.f[ DIR_M0P ])[ktw] = f_BE-c1o54;
+
+       (D.f[ DIR_0PP ])[ktn] = f_BS-c1o54;
+       (D.f[ DIR_0MM ])[kbs] = f_TN-c1o54;
+       (D.f[ DIR_0PM ])[kbn] = f_TS-c1o54;
+       (D.f[ DIR_0MP ])[kts] = f_BN-c1o54;
+
+       (D.f[ DIR_000])[k] = f_ZERO-c8o27;
+
+       (D.f[ DIR_PPP ])[ktne] = f_BSW-c1o216;
+       (D.f[ DIR_PMP ])[ktse] = f_BNW-c1o216;
+       (D.f[ DIR_PPM ])[kbne] = f_TSW-c1o216;
+       (D.f[ DIR_PMM ])[kbse] = f_TNW-c1o216;
+       (D.f[ DIR_MPP ])[ktnw] = f_BSE-c1o216;
+       (D.f[ DIR_MMP ])[ktsw] = f_BNE-c1o216;
+       (D.f[ DIR_MPM ])[kbnw] = f_TSE-c1o216;
+       (D.f[ DIR_MMM ])[kbsw] = f_TNE-c1o216;
       }
      __syncthreads();
      }
@@ -4054,7 +4054,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Casc27(real omega,
+__global__ void LB_Kernel_Casc27(real omega,
                                             unsigned int* bcMatD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
@@ -4089,63 +4089,63 @@ extern "C" __global__ void LB_Kernel_Casc27(real omega,
       Distributions27 D;
       if (EvenOrOdd==true)
       {
-         D.f[E   ] = &DDStart[E   *size_Mat];
-         D.f[W   ] = &DDStart[W   *size_Mat];
-         D.f[N   ] = &DDStart[N   *size_Mat];
-         D.f[S   ] = &DDStart[S   *size_Mat];
-         D.f[T   ] = &DDStart[T   *size_Mat];
-         D.f[B   ] = &DDStart[B   *size_Mat];
-         D.f[NE  ] = &DDStart[NE  *size_Mat];
-         D.f[SW  ] = &DDStart[SW  *size_Mat];
-         D.f[SE  ] = &DDStart[SE  *size_Mat];
-         D.f[NW  ] = &DDStart[NW  *size_Mat];
-         D.f[TE  ] = &DDStart[TE  *size_Mat];
-         D.f[BW  ] = &DDStart[BW  *size_Mat];
-         D.f[BE  ] = &DDStart[BE  *size_Mat];
-         D.f[TW  ] = &DDStart[TW  *size_Mat];
-         D.f[TN  ] = &DDStart[TN  *size_Mat];
-         D.f[BS  ] = &DDStart[BS  *size_Mat];
-         D.f[BN  ] = &DDStart[BN  *size_Mat];
-         D.f[TS  ] = &DDStart[TS  *size_Mat];
-         D.f[REST] = &DDStart[REST*size_Mat];
-         D.f[TNE ] = &DDStart[TNE *size_Mat];
-         D.f[TSW ] = &DDStart[TSW *size_Mat];
-         D.f[TSE ] = &DDStart[TSE *size_Mat];
-         D.f[TNW ] = &DDStart[TNW *size_Mat];
-         D.f[BNE ] = &DDStart[BNE *size_Mat];
-         D.f[BSW ] = &DDStart[BSW *size_Mat];
-         D.f[BSE ] = &DDStart[BSE *size_Mat];
-         D.f[BNW ] = &DDStart[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
       }
       else
       {
-         D.f[W   ] = &DDStart[E   *size_Mat];
-         D.f[E   ] = &DDStart[W   *size_Mat];
-         D.f[S   ] = &DDStart[N   *size_Mat];
-         D.f[N   ] = &DDStart[S   *size_Mat];
-         D.f[B   ] = &DDStart[T   *size_Mat];
-         D.f[T   ] = &DDStart[B   *size_Mat];
-         D.f[SW  ] = &DDStart[NE  *size_Mat];
-         D.f[NE  ] = &DDStart[SW  *size_Mat];
-         D.f[NW  ] = &DDStart[SE  *size_Mat];
-         D.f[SE  ] = &DDStart[NW  *size_Mat];
-         D.f[BW  ] = &DDStart[TE  *size_Mat];
-         D.f[TE  ] = &DDStart[BW  *size_Mat];
-         D.f[TW  ] = &DDStart[BE  *size_Mat];
-         D.f[BE  ] = &DDStart[TW  *size_Mat];
-         D.f[BS  ] = &DDStart[TN  *size_Mat];
-         D.f[TN  ] = &DDStart[BS  *size_Mat];
-         D.f[TS  ] = &DDStart[BN  *size_Mat];
-         D.f[BN  ] = &DDStart[TS  *size_Mat];
-         D.f[REST] = &DDStart[REST*size_Mat];
-         D.f[BSW ] = &DDStart[TNE *size_Mat];
-         D.f[BNE ] = &DDStart[TSW *size_Mat];
-         D.f[BNW ] = &DDStart[TSE *size_Mat];
-         D.f[BSE ] = &DDStart[TNW *size_Mat];
-         D.f[TSW ] = &DDStart[BNE *size_Mat];
-         D.f[TNE ] = &DDStart[BSW *size_Mat];
-         D.f[TNW ] = &DDStart[BSE *size_Mat];
-         D.f[TSE ] = &DDStart[BNW *size_Mat];
+         D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+         D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+         D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+         D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+         D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+         D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+         D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+         D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+         D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -4208,33 +4208,33 @@ extern "C" __global__ void LB_Kernel_Casc27(real omega,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_B,f_NE,f_SW,f_SE,f_NW,f_TE,f_BW,f_BE,f_TW,f_TN,f_BS,f_BN,f_TS,f_ZERO, f_TNE,f_TNW,f_TSE,f_TSW, f_BNE,f_BNW,f_BSE,f_BSW;
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      f_E    =  (D.f[E   ])[ke]+c2o27;
-      f_W    =  (D.f[W   ])[kw]+c2o27;
-      f_N    =  (D.f[N   ])[kn]+c2o27;
-      f_S    =  (D.f[S   ])[ks]+c2o27;
-      f_T    =  (D.f[T   ])[kt]+c2o27;
-      f_B    =  (D.f[B   ])[kb]+c2o27;
-      f_NE   =  (D.f[NE  ])[kne]+c1o54;
-      f_SW   =  (D.f[SW  ])[ksw]+c1o54;
-      f_SE   =  (D.f[SE  ])[kse]+c1o54;
-      f_NW   =  (D.f[NW  ])[knw]+c1o54;
-      f_TE   =  (D.f[TE  ])[kte]+c1o54;
-      f_BW   =  (D.f[BW  ])[kbw]+c1o54;
-      f_BE   =  (D.f[BE  ])[kbe]+c1o54;
-      f_TW   =  (D.f[TW  ])[ktw]+c1o54;
-      f_TN   =  (D.f[TN  ])[ktn]+c1o54;
-      f_BS   =  (D.f[BS  ])[kbs]+c1o54;
-      f_BN   =  (D.f[BN  ])[kbn]+c1o54;
-      f_TS   =  (D.f[TS  ])[kts]+c1o54;
-      f_ZERO =  (D.f[REST])[kzero]+c8o27;
-      f_TNE   = (D.f[TNE ])[ktne]+c1o216;
-      f_TSW   = (D.f[TSW ])[ktsw]+c1o216;
-      f_TSE   = (D.f[TSE ])[ktse]+c1o216;
-      f_TNW   = (D.f[TNW ])[ktnw]+c1o216;
-      f_BNE   = (D.f[BNE ])[kbne]+c1o216;
-      f_BSW   = (D.f[BSW ])[kbsw]+c1o216;
-      f_BSE   = (D.f[BSE ])[kbse]+c1o216;
-      f_BNW   = (D.f[BNW ])[kbnw]+c1o216;
+      f_E    =  (D.f[DIR_P00   ])[ke]+c2o27;
+      f_W    =  (D.f[DIR_M00   ])[kw]+c2o27;
+      f_N    =  (D.f[DIR_0P0   ])[kn]+c2o27;
+      f_S    =  (D.f[DIR_0M0   ])[ks]+c2o27;
+      f_T    =  (D.f[DIR_00P   ])[kt]+c2o27;
+      f_B    =  (D.f[DIR_00M   ])[kb]+c2o27;
+      f_NE   =  (D.f[DIR_PP0  ])[kne]+c1o54;
+      f_SW   =  (D.f[DIR_MM0  ])[ksw]+c1o54;
+      f_SE   =  (D.f[DIR_PM0  ])[kse]+c1o54;
+      f_NW   =  (D.f[DIR_MP0  ])[knw]+c1o54;
+      f_TE   =  (D.f[DIR_P0P  ])[kte]+c1o54;
+      f_BW   =  (D.f[DIR_M0M  ])[kbw]+c1o54;
+      f_BE   =  (D.f[DIR_P0M  ])[kbe]+c1o54;
+      f_TW   =  (D.f[DIR_M0P  ])[ktw]+c1o54;
+      f_TN   =  (D.f[DIR_0PP  ])[ktn]+c1o54;
+      f_BS   =  (D.f[DIR_0MM  ])[kbs]+c1o54;
+      f_BN   =  (D.f[DIR_0PM  ])[kbn]+c1o54;
+      f_TS   =  (D.f[DIR_0MP  ])[kts]+c1o54;
+      f_ZERO =  (D.f[DIR_000])[kzero]+c8o27;
+      f_TNE   = (D.f[DIR_PPP ])[ktne]+c1o216;
+      f_TSW   = (D.f[DIR_MMP ])[ktsw]+c1o216;
+      f_TSE   = (D.f[DIR_PMP ])[ktse]+c1o216;
+      f_TNW   = (D.f[DIR_MPP ])[ktnw]+c1o216;
+      f_BNE   = (D.f[DIR_PPM ])[kbne]+c1o216;
+      f_BSW   = (D.f[DIR_MMM ])[kbsw]+c1o216;
+      f_BSE   = (D.f[DIR_PMM ])[kbse]+c1o216;
+      f_BNW   = (D.f[DIR_MPM ])[kbnw]+c1o216;
       ////////////////////////////////////////////////////////////////////////////////
 
       if( BC == GEO_FLUID || BC == GEO_VELO)
@@ -4673,38 +4673,38 @@ extern "C" __global__ void LB_Kernel_Casc27(real omega,
          }
       }
 
-      (D.f[ E  ])[ke ] = f_W-c2o27;
-      (D.f[ W  ])[kw ] = f_E-c2o27;
-
-      (D.f[ N  ])[kn ] = f_S-c2o27;
-      (D.f[ S  ])[ks ] = f_N-c2o27;
-      (D.f[ T  ])[kt ] = f_B-c2o27;
-      (D.f[ B  ])[kb ] = f_T-c2o27;
-
-      (D.f[ NE ])[kne] = f_SW-c1o54;
-      (D.f[ SW ])[ksw] = f_NE-c1o54;
-      (D.f[ SE ])[kse] = f_NW-c1o54;
-      (D.f[ NW ])[knw] = f_SE-c1o54;
-      (D.f[ TE ])[kte] = f_BW-c1o54;
-      (D.f[ BW ])[kbw] = f_TE-c1o54;
-      (D.f[ BE ])[kbe] = f_TW-c1o54;
-      (D.f[ TW ])[ktw] = f_BE-c1o54;
-
-      (D.f[ TN ])[ktn] = f_BS-c1o54;
-      (D.f[ BS ])[kbs] = f_TN-c1o54;
-      (D.f[ BN ])[kbn] = f_TS-c1o54;
-      (D.f[ TS ])[kts] = f_BN-c1o54;
-
-      (D.f[ REST])[k] = f_ZERO-c8o27;
-
-      (D.f[ TNE ])[ktne] = f_BSW-c1o216;
-      (D.f[ TSE ])[ktse] = f_BNW-c1o216;
-      (D.f[ BNE ])[kbne] = f_TSW-c1o216;
-      (D.f[ BSE ])[kbse] = f_TNW-c1o216;
-      (D.f[ TNW ])[ktnw] = f_BSE-c1o216;
-      (D.f[ TSW ])[ktsw] = f_BNE-c1o216;
-      (D.f[ BNW ])[kbnw] = f_TSE-c1o216;
-      (D.f[ BSW ])[kbsw] = f_TNE-c1o216;
+      (D.f[ DIR_P00  ])[ke ] = f_W-c2o27;
+      (D.f[ DIR_M00  ])[kw ] = f_E-c2o27;
+
+      (D.f[ DIR_0P0  ])[kn ] = f_S-c2o27;
+      (D.f[ DIR_0M0  ])[ks ] = f_N-c2o27;
+      (D.f[ DIR_00P  ])[kt ] = f_B-c2o27;
+      (D.f[ DIR_00M  ])[kb ] = f_T-c2o27;
+
+      (D.f[ DIR_PP0 ])[kne] = f_SW-c1o54;
+      (D.f[ DIR_MM0 ])[ksw] = f_NE-c1o54;
+      (D.f[ DIR_PM0 ])[kse] = f_NW-c1o54;
+      (D.f[ DIR_MP0 ])[knw] = f_SE-c1o54;
+      (D.f[ DIR_P0P ])[kte] = f_BW-c1o54;
+      (D.f[ DIR_M0M ])[kbw] = f_TE-c1o54;
+      (D.f[ DIR_P0M ])[kbe] = f_TW-c1o54;
+      (D.f[ DIR_M0P ])[ktw] = f_BE-c1o54;
+
+      (D.f[ DIR_0PP ])[ktn] = f_BS-c1o54;
+      (D.f[ DIR_0MM ])[kbs] = f_TN-c1o54;
+      (D.f[ DIR_0PM ])[kbn] = f_TS-c1o54;
+      (D.f[ DIR_0MP ])[kts] = f_BN-c1o54;
+
+      (D.f[ DIR_000])[k] = f_ZERO-c8o27;
+
+      (D.f[ DIR_PPP ])[ktne] = f_BSW-c1o216;
+      (D.f[ DIR_PMP ])[ktse] = f_BNW-c1o216;
+      (D.f[ DIR_PPM ])[kbne] = f_TSW-c1o216;
+      (D.f[ DIR_PMM ])[kbse] = f_TNW-c1o216;
+      (D.f[ DIR_MPP ])[ktnw] = f_BSE-c1o216;
+      (D.f[ DIR_MMP ])[ktsw] = f_BNE-c1o216;
+      (D.f[ DIR_MPM ])[kbnw] = f_TSE-c1o216;
+      (D.f[ DIR_MMM ])[kbsw] = f_TNE-c1o216;
    }
    __syncthreads();
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
index 158889da63508f112e135983beedfc00debfba15..6bdef532b49e10a9eece94b3348630b2f60c842c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.cpp
@@ -32,6 +32,18 @@ void CudaMemoryManager::cudaCopyPrint(int lev)
     checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->velocityZ   , parameter->getParD(lev)->velocityZ   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
     checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->rho  , parameter->getParD(lev)->rho  , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
     checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->pressure, parameter->getParD(lev)->pressure, parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+
+    if(parameter->getIsBodyForce())
+    {
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceX_SP   , parameter->getParD(lev)->forceX_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceY_SP   , parameter->getParD(lev)->forceY_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->forceZ_SP   , parameter->getParD(lev)->forceZ_SP   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    }
+
+    if(parameter->getUseTurbulentViscosity())
+    {
+        checkCudaErrors( cudaMemcpy(parameter->getParH(lev)->turbViscosity   , parameter->getParD(lev)->turbViscosity   , parameter->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost));
+    }
 }
 void CudaMemoryManager::cudaCopyMedianPrint(int lev)
 {
@@ -278,8 +290,8 @@ void CudaMemoryManager::cudaFreeOutflowBC(int lev)
 	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->outflowBC.kN     ));
 	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->outflowBC.RhoBC  ));
 }
-//Wall
-void CudaMemoryManager::cudaAllocWallBC(int lev)
+//No-Slip
+void CudaMemoryManager::cudaAllocNoSlipBC(int lev)
 {
 	unsigned int mem_size_Q_k      = sizeof(int)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
 	unsigned int mem_size_Q_q      = sizeof(real)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
@@ -300,7 +312,7 @@ void CudaMemoryManager::cudaAllocWallBC(int lev)
 	double tmp = (double)mem_size_Q_k + (double)parameter->getD3Qxx()*(double)mem_size_Q_q;
 	setMemsizeGPU(tmp, false);
 }
-void CudaMemoryManager::cudaCopyWallBC(int lev)
+void CudaMemoryManager::cudaCopyNoSlipBC(int lev)
 {
 	unsigned int mem_size_Q_k = sizeof(int)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
 	unsigned int mem_size_Q_q = sizeof(real)*parameter->getParH(lev)->noSlipBC.numberOfBCnodes;
@@ -308,7 +320,7 @@ void CudaMemoryManager::cudaCopyWallBC(int lev)
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.q27[0], parameter->getParH(lev)->noSlipBC.q27[0], parameter->getD3Qxx()* mem_size_Q_q,       cudaMemcpyHostToDevice));
 	checkCudaErrors( cudaMemcpy(parameter->getParD(lev)->noSlipBC.k,      parameter->getParH(lev)->noSlipBC.k,                  mem_size_Q_k,       cudaMemcpyHostToDevice));
 }
-void CudaMemoryManager::cudaFreeWallBC(int lev)
+void CudaMemoryManager::cudaFreeNoSlipBC(int lev)
 {
 	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->noSlipBC.q27[0]));
 	checkCudaErrors( cudaFreeHost(parameter->getParH(lev)->noSlipBC.k));
@@ -3082,7 +3094,7 @@ void CudaMemoryManager::cudaFreeFluidNodeIndices(int lev) {
 }
 
 void CudaMemoryManager::cudaAllocFluidNodeIndicesBorder(int lev) {
-    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOffluidNodesBorder;
+    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder;
     // Host
     checkCudaErrors(
         cudaMallocHost((void **)&(parameter->getParH(lev)->fluidNodeIndicesBorder), mem_size_fluid_nodes_border));
@@ -3094,7 +3106,7 @@ void CudaMemoryManager::cudaAllocFluidNodeIndicesBorder(int lev) {
 }
 
 void CudaMemoryManager::cudaCopyFluidNodeIndicesBorder(int lev) {
-    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOffluidNodesBorder;
+    uint mem_size_fluid_nodes_border = sizeof(uint) * parameter->getParH(lev)->numberOfFluidNodesBorder;
     checkCudaErrors(cudaMemcpy(parameter->getParD(lev)->fluidNodeIndicesBorder,
                                parameter->getParH(lev)->fluidNodeIndicesBorder,
                                mem_size_fluid_nodes_border, cudaMemcpyHostToDevice));
diff --git a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
index 6e7084d1bfeb5ad13c4ea9d6bc012b6a287de7b7..e47c87208b0c9e62136fc7fe8fd75c638d4345ac 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/CudaMemoryManager.h
@@ -26,10 +26,10 @@ class PrecursorWriter;
 class VIRTUALFLUIDS_GPU_EXPORT CudaMemoryManager
 {
 public:
-	CudaMemoryManager(std::shared_ptr<Parameter> parameter);
+    CudaMemoryManager(std::shared_ptr<Parameter> parameter);
 
-	void setMemsizeGPU(double admem, bool reset);
-	double getMemsizeGPU();
+    void setMemsizeGPU(double admem, bool reset);
+    double getMemsizeGPU();
 
     void cudaAllocFull(int lev);
     void cudaFreeFull(int lev);
@@ -37,109 +37,109 @@ public:
     void cudaCopyPrint(int lev);
     void cudaCopyMedianPrint(int lev);
 
-	void cudaAllocCoord(int lev);
-	void cudaCopyCoord(int lev);
-	void cudaFreeCoord(int lev);
+    void cudaAllocCoord(int lev);
+    void cudaCopyCoord(int lev);
+    void cudaFreeCoord(int lev);
 
-	void cudaAllocBodyForce(int lev);
+    void cudaAllocBodyForce(int lev);
     void cudaCopyBodyForce(int lev);
     void cudaFreeBodyForce(int lev);
 
     void cudaCopyDataToHost(int lev);
 
-	void cudaAllocSP(int lev);
-	void cudaCopySP(int lev);
-	void cudaFreeSP(int lev);
+    void cudaAllocSP(int lev);
+    void cudaCopySP(int lev);
+    void cudaFreeSP(int lev);
 
     void cudaAllocF3SP(int lev);
 
-	void cudaAllocNeighborWSB(int lev);
-	void cudaCopyNeighborWSB(int lev);
-	void cudaFreeNeighborWSB(int lev);
+    void cudaAllocNeighborWSB(int lev);
+    void cudaCopyNeighborWSB(int lev);
+    void cudaFreeNeighborWSB(int lev);
 
-	void cudaAllocVeloBC(int lev);
-	void cudaCopyVeloBC(int lev);
-	void cudaFreeVeloBC(int lev);
+    void cudaAllocVeloBC(int lev);
+    void cudaCopyVeloBC(int lev);
+    void cudaFreeVeloBC(int lev);
 
-	void cudaAllocOutflowBC(int lev);
-	void cudaCopyOutflowBC(int lev);
-	void cudaFreeOutflowBC(int lev);
+    void cudaAllocOutflowBC(int lev);
+    void cudaCopyOutflowBC(int lev);
+    void cudaFreeOutflowBC(int lev);
 
-	void cudaAllocWallBC(int lev);
-	void cudaCopyWallBC(int lev);
-	void cudaFreeWallBC(int lev);
+    void cudaAllocNoSlipBC(int lev);
+    void cudaCopyNoSlipBC(int lev);
+    void cudaFreeNoSlipBC(int lev);
 
-	void cudaAllocGeomBC(int lev);
-	void cudaCopyGeomBC(int lev);
-	void cudaFreeGeomBC(int lev);
+    void cudaAllocGeomBC(int lev);
+    void cudaCopyGeomBC(int lev);
+    void cudaFreeGeomBC(int lev);
 
-	void cudaAllocPress(int lev);
-	void cudaCopyPress(int lev);
-	void cudaFreePress(int lev);
+    void cudaAllocPress(int lev);
+    void cudaCopyPress(int lev);
+    void cudaFreePress(int lev);
 
-	void cudaAllocForcing();
-	void cudaCopyForcingToDevice();
-	void cudaCopyForcingToHost();
-	void cudaFreeForcing();
+    void cudaAllocForcing();
+    void cudaCopyForcingToDevice();
+    void cudaCopyForcingToHost();
+    void cudaFreeForcing();
 
     void cudaAllocLevelForcing(int level);
-	void cudaCopyLevelForcingToDevice(int level);
-	void cudaFreeLevelForcing(int level);
+    void cudaCopyLevelForcingToDevice(int level);
+    void cudaFreeLevelForcing(int level);
 
-	void cudaAllocQuadricLimiters();
-	void cudaCopyQuadricLimitersToDevice();
-	void cudaFreeQuadricLimiters();
+    void cudaAllocQuadricLimiters();
+    void cudaCopyQuadricLimitersToDevice();
+    void cudaFreeQuadricLimiters();
 
-	//////////////////////////////////////////////////////////////////////////
-	//3D domain decomposition
-	void cudaAllocProcessNeighborX(int lev, unsigned int processNeighbor);
+    //////////////////////////////////////////////////////////////////////////
+    //3D domain decomposition
+    void cudaAllocProcessNeighborX(int lev, unsigned int processNeighbor);
     void cudaCopyProcessNeighborXFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
                                       int streamIndex);
     void cudaCopyProcessNeighborXFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
                                       int streamIndex);
-	void cudaCopyProcessNeighborXIndex(int lev, unsigned int processNeighbor);
-	void cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor);
-	//
-	void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborXIndex(int lev, unsigned int processNeighbor);
+    void cudaFreeProcessNeighborX(int lev, unsigned int processNeighbor);
+    //
+    void cudaAllocProcessNeighborY(int lev, unsigned int processNeighbor);
     void cudaCopyProcessNeighborYFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
                                       int streamIndex);
     void cudaCopyProcessNeighborYFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
                                       int streamIndex);
-	void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborYIndex(int lev, unsigned int processNeighbor);
     void cudaFreeProcessNeighborY(int lev, unsigned int processNeighbor);
-	//
-	void cudaAllocProcessNeighborZ(int lev, unsigned int processNeighbor);
+    //
+    void cudaAllocProcessNeighborZ(int lev, unsigned int processNeighbor);
     void cudaCopyProcessNeighborZFsHD(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsRecv,
                                       int streamIndex);
     void cudaCopyProcessNeighborZFsDH(int lev, unsigned int processNeighbor, const unsigned int &memsizeFsSend,
                                       int streamIndex);
-	void cudaCopyProcessNeighborZIndex(int lev, unsigned int processNeighbor);
-	void cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor);
-
-	//////////////////////////////////////////////////////////////////////////
-
-	//////////////////////////////////////////////////////////////////////////
-	//3D domain decomposition F3
-	void cudaAllocProcessNeighborF3X(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3XFsHD(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3XFsDH(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3XIndex(int lev, unsigned int processNeighbor);
-	void cudaFreeProcessNeighborF3X(int lev, unsigned int processNeighbor);
-	//
-	void cudaAllocProcessNeighborF3Y(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3YFsHD(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3YFsDH(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3YIndex(int lev, unsigned int processNeighbor);
-	void cudaFreeProcessNeighborF3Y(int lev, unsigned int processNeighbor);
-	//
-	void cudaAllocProcessNeighborF3Z(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3ZFsHD(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3ZFsDH(int lev, unsigned int processNeighbor);
-	void cudaCopyProcessNeighborF3ZIndex(int lev, unsigned int processNeighbor);
-	void cudaFreeProcessNeighborF3Z(int lev, unsigned int processNeighbor);
-	//////////////////////////////////////////////////////////////////////////
-
-	void cudaAllocTurbulentViscosity(int lev);
+    void cudaCopyProcessNeighborZIndex(int lev, unsigned int processNeighbor);
+    void cudaFreeProcessNeighborZ(int lev, unsigned int processNeighbor);
+
+    //////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //3D domain decomposition F3
+    void cudaAllocProcessNeighborF3X(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3XFsHD(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3XFsDH(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3XIndex(int lev, unsigned int processNeighbor);
+    void cudaFreeProcessNeighborF3X(int lev, unsigned int processNeighbor);
+    //
+    void cudaAllocProcessNeighborF3Y(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3YFsHD(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3YFsDH(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3YIndex(int lev, unsigned int processNeighbor);
+    void cudaFreeProcessNeighborF3Y(int lev, unsigned int processNeighbor);
+    //
+    void cudaAllocProcessNeighborF3Z(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3ZFsHD(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3ZFsDH(int lev, unsigned int processNeighbor);
+    void cudaCopyProcessNeighborF3ZIndex(int lev, unsigned int processNeighbor);
+    void cudaFreeProcessNeighborF3Z(int lev, unsigned int processNeighbor);
+    //////////////////////////////////////////////////////////////////////////
+
+    void cudaAllocTurbulentViscosity(int lev);
     void cudaCopyTurbulentViscosityHD(int lev);
     void cudaCopyTurbulentViscosityDH(int lev);
     void cudaFreeTurbulentViscosity(int lev);
@@ -419,7 +419,7 @@ public:
 
 private:
     std::shared_ptr<Parameter> parameter;
-    double memsizeGPU = 0.;
+    double memsizeGPU = 0.0;
 
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
index 7a12d97d8dc5091a2479d84930b610a023aa6b17..bbce8181d814fc8b9dbb086764becb73a86c0eda 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27.cu
@@ -15,7 +15,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
+__global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* bcMatD,
 																unsigned int* neighborX,
 																unsigned int* neighborY,
@@ -48,63 +48,63 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k  ];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -894,33 +894,33 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[ N   ])[k   ] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[ S   ])[ks  ] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[ T   ])[k   ] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[ B   ])[kb  ] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[ NE  ])[k   ] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[ SW  ])[ksw ] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[ SE  ])[ks  ] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[ NW  ])[kw  ] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[ TE  ])[k   ] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[ BW  ])[kbw ] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[ BE  ])[kb  ] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[ TW  ])[kw  ] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[ TN  ])[k   ] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[ BS  ])[kbs ] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[ BN  ])[kb  ] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[ TS  ])[ks  ] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[ REST])[k   ] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[ TNE ])[k   ] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[ TSE ])[ks  ] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[ BNE ])[kb  ] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[ BSE ])[kbs ] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[ TNW ])[kw  ] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[ TSW ])[ksw ] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[ BNW ])[kbw ] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[ BSW ])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[ DIR_P00   ])[k   ] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[ DIR_00P   ])[k   ] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[ DIR_000])[k   ] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -966,7 +966,7 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
+__global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -1000,63 +1000,63 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -1089,33 +1089,33 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k  ];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -1933,33 +1933,33 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[ N   ])[k   ] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[ S   ])[ks  ] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[ T   ])[k   ] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[ B   ])[kb  ] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[ NE  ])[k   ] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[ SW  ])[ksw ] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[ SE  ])[ks  ] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[ NW  ])[kw  ] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[ TE  ])[k   ] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[ BW  ])[kbw ] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[ BE  ])[kb  ] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[ TW  ])[kw  ] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[ TN  ])[k   ] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[ BS  ])[kbs ] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[ BN  ])[kb  ] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[ TS  ])[ks  ] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[ REST])[k   ] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[ TNE ])[k   ] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[ TSE ])[ks  ] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[ BNE ])[kb  ] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[ BSE ])[kbs ] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[ TNW ])[kw  ] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[ TSW ])[ksw ] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[ BNW ])[kbw ] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[ BSW ])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[ DIR_P00   ])[k   ] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[ DIR_00P   ])[k   ] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[ DIR_000])[k   ] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -2005,7 +2005,7 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
+__global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real deltaPhi,
 													real angularVelocity,
 													unsigned int* bcMatD,
@@ -2041,63 +2041,63 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -2158,33 +2158,33 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k  ];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//Ship
 			real coord0X = 281.125f;//7.5f;
@@ -3160,33 +3160,33 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[ N   ])[k   ] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[ S   ])[ks  ] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[ T   ])[k   ] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[ B   ])[kb  ] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[ NE  ])[k   ] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[ SW  ])[ksw ] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[ SE  ])[ks  ] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[ NW  ])[kw  ] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[ TE  ])[k   ] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[ BW  ])[kbw ] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[ BE  ])[kb  ] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[ TW  ])[kw  ] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[ TN  ])[k   ] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[ BS  ])[kbs ] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[ BN  ])[kb  ] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[ TS  ])[ks  ] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[ REST])[k   ] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[ TNE ])[k   ] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[ TSE ])[ks  ] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[ BNE ])[kb  ] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[ BSE ])[kbs ] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[ TNW ])[kw  ] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[ TSW ])[ksw ] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[ BNW ])[kbw ] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[ BSW ])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[ DIR_P00   ])[k   ] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[ DIR_00P   ])[k   ] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[ DIR_000])[k   ] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -3232,7 +3232,7 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
+__global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -3263,63 +3263,63 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -3380,33 +3380,33 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k  ];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 			////////////////////////////////////////////////////////////////////////////////////
 			//slow
 			//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -4432,33 +4432,33 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[ N   ])[k   ] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[ S   ])[ks  ] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[ T   ])[k   ] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[ B   ])[kb  ] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[ NE  ])[k   ] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[ SW  ])[ksw ] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[ SE  ])[ks  ] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[ NW  ])[kw  ] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[ TE  ])[k   ] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[ BW  ])[kbw ] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[ BE  ])[kb  ] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[ TW  ])[kw  ] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[ TN  ])[k   ] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[ BS  ])[kbs ] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[ BN  ])[kb  ] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[ TS  ])[ks  ] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[ REST])[k   ] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[ TNE ])[k   ] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[ TSE ])[ks  ] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[ BNE ])[kb  ] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[ BSE ])[kbs ] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[ TNW ])[kw  ] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[ TSW ])[ksw ] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[ BNW ])[kbw ] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[ BSW ])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[ DIR_P00   ])[k   ] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[ DIR_00P   ])[k   ] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[ DIR_000])[k   ] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -4504,7 +4504,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27(     real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
+__global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -4535,63 +4535,63 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -4624,33 +4624,33 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 			unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real f_E     = (D.f[E   ])[ke   ];// +  c2over27 ;
-			real f_W     = (D.f[W   ])[kw   ];// +  c2over27 ;
-			real f_N     = (D.f[N   ])[kn   ];// +  c2over27 ;
-			real f_S     = (D.f[S   ])[ks   ];// +  c2over27 ;
-			real f_T     = (D.f[T   ])[kt   ];// +  c2over27 ;
-			real f_B     = (D.f[B   ])[kb   ];// +  c2over27 ;
-			real f_NE    = (D.f[NE  ])[kne  ];// +  c1over54 ;
-			real f_SW    = (D.f[SW  ])[ksw  ];// +  c1over54 ;
-			real f_SE    = (D.f[SE  ])[kse  ];// +  c1over54 ;
-			real f_NW    = (D.f[NW  ])[knw  ];// +  c1over54 ;
-			real f_TE    = (D.f[TE  ])[kte  ];// +  c1over54 ;
-			real f_BW    = (D.f[BW  ])[kbw  ];// +  c1over54 ;
-			real f_BE    = (D.f[BE  ])[kbe  ];// +  c1over54 ;
-			real f_TW    = (D.f[TW  ])[ktw  ];// +  c1over54 ;
-			real f_TN    = (D.f[TN  ])[ktn  ];// +  c1over54 ;
-			real f_BS    = (D.f[BS  ])[kbs  ];// +  c1over54 ;
-			real f_BN    = (D.f[BN  ])[kbn  ];// +  c1over54 ;
-			real f_TS    = (D.f[TS  ])[kts  ];// +  c1over54 ;
-			real f_R     = (D.f[REST])[kzero];// +  c8over27 ;
-			real f_TNE   = (D.f[TNE ])[ktne ];// +  c1over216;
-			real f_TSW   = (D.f[TSW ])[ktsw ];// +  c1over216;
-			real f_TSE   = (D.f[TSE ])[ktse ];// +  c1over216;
-			real f_TNW   = (D.f[TNW ])[ktnw ];// +  c1over216;
-			real f_BNE   = (D.f[BNE ])[kbne ];// +  c1over216;
-			real f_BSW   = (D.f[BSW ])[kbsw ];// +  c1over216;
-			real f_BSE   = (D.f[BSE ])[kbse ];// +  c1over216;
-			real f_BNW   = (D.f[BNW ])[kbnw ];// +  c1over216;
+			real f_E     = (D.f[DIR_P00   ])[ke   ];// +  c2over27 ;
+			real f_W     = (D.f[DIR_M00   ])[kw   ];// +  c2over27 ;
+			real f_N     = (D.f[DIR_0P0   ])[kn   ];// +  c2over27 ;
+			real f_S     = (D.f[DIR_0M0   ])[ks   ];// +  c2over27 ;
+			real f_T     = (D.f[DIR_00P   ])[kt   ];// +  c2over27 ;
+			real f_B     = (D.f[DIR_00M   ])[kb   ];// +  c2over27 ;
+			real f_NE    = (D.f[DIR_PP0  ])[kne  ];// +  c1over54 ;
+			real f_SW    = (D.f[DIR_MM0  ])[ksw  ];// +  c1over54 ;
+			real f_SE    = (D.f[DIR_PM0  ])[kse  ];// +  c1over54 ;
+			real f_NW    = (D.f[DIR_MP0  ])[knw  ];// +  c1over54 ;
+			real f_TE    = (D.f[DIR_P0P  ])[kte  ];// +  c1over54 ;
+			real f_BW    = (D.f[DIR_M0M  ])[kbw  ];// +  c1over54 ;
+			real f_BE    = (D.f[DIR_P0M  ])[kbe  ];// +  c1over54 ;
+			real f_TW    = (D.f[DIR_M0P  ])[ktw  ];// +  c1over54 ;
+			real f_TN    = (D.f[DIR_0PP  ])[ktn  ];// +  c1over54 ;
+			real f_BS    = (D.f[DIR_0MM  ])[kbs  ];// +  c1over54 ;
+			real f_BN    = (D.f[DIR_0PM  ])[kbn  ];// +  c1over54 ;
+			real f_TS    = (D.f[DIR_0MP  ])[kts  ];// +  c1over54 ;
+			real f_R     = (D.f[DIR_000])[kzero];// +  c8over27 ;
+			real f_TNE   = (D.f[DIR_PPP ])[ktne ];// +  c1over216;
+			real f_TSW   = (D.f[DIR_MMP ])[ktsw ];// +  c1over216;
+			real f_TSE   = (D.f[DIR_PMP ])[ktse ];// +  c1over216;
+			real f_TNW   = (D.f[DIR_MPP ])[ktnw ];// +  c1over216;
+			real f_BNE   = (D.f[DIR_PPM ])[kbne ];// +  c1over216;
+			real f_BSW   = (D.f[DIR_MMM ])[kbsw ];// +  c1over216;
+			real f_BSE   = (D.f[DIR_PMM ])[kbse ];// +  c1over216;
+			real f_BNW   = (D.f[DIR_MPM ])[kbnw ];// +  c1over216;
 			////////////////////////////////////////////////////////////////////////////////////
 			real fx = c0o1;
 			real fy = c0o1;
@@ -5372,33 +5372,33 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;//                                                                     
-			(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;                                                                     
-			(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;
-			(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;
-			(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;
-			(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;
-			(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;
-			(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;
-			(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;
-			(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;
-			(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;
-			(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;
-			(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;
-			(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;
-			(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;
-			(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;
-			(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;
-			(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;
-			(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;
-			(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;
-			(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;
-			(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;
-			(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;
-			(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;
-			(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;
-			(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;
-			(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;
+			(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;//                                                                     
+			(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;                                                                     
+			(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;
+			(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;
+			(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;
+			(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;
+			(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;
+			(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;
+			(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;
+			(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;
+			(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;
+			(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;
+			(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;
+			(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;
+			(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;
+			(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;
+			(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;
+			(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;
+			(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;
+			(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;
+			(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;
+			(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;
+			(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;
+			(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;
+			(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;
+			(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;
+			(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
@@ -5444,7 +5444,7 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(    real omega,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
+__global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -5478,63 +5478,63 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -5568,33 +5568,33 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k   ];
-			real mfabb = (D.f[W   ])[kw  ];
-			real mfbcb = (D.f[N   ])[k   ];
-			real mfbab = (D.f[S   ])[ks  ];
-			real mfbbc = (D.f[T   ])[k   ];
-			real mfbba = (D.f[B   ])[kb  ];
-			real mfccb = (D.f[NE  ])[k   ];
-			real mfaab = (D.f[SW  ])[ksw ];
-			real mfcab = (D.f[SE  ])[ks  ];
-			real mfacb = (D.f[NW  ])[kw  ];
-			real mfcbc = (D.f[TE  ])[k   ];
-			real mfaba = (D.f[BW  ])[kbw ];
-			real mfcba = (D.f[BE  ])[kb  ];
-			real mfabc = (D.f[TW  ])[kw  ];
-			real mfbcc = (D.f[TN  ])[k   ];
-			real mfbaa = (D.f[BS  ])[kbs ];
-			real mfbca = (D.f[BN  ])[kb  ];
-			real mfbac = (D.f[TS  ])[ks  ];
-			real mfbbb = (D.f[REST])[k   ];
-			real mfccc = (D.f[TNE ])[k   ];
-			real mfaac = (D.f[TSW ])[ksw ];
-			real mfcac = (D.f[TSE ])[ks  ];
-			real mfacc = (D.f[TNW ])[kw  ];
-			real mfcca = (D.f[BNE ])[kb  ];
-			real mfaaa = (D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs ];
-			real mfaca = (D.f[BNW ])[kbw ];
+			real mfcbb = (D.f[DIR_P00   ])[k   ];
+			real mfabb = (D.f[DIR_M00   ])[kw  ];
+			real mfbcb = (D.f[DIR_0P0   ])[k   ];
+			real mfbab = (D.f[DIR_0M0   ])[ks  ];
+			real mfbbc = (D.f[DIR_00P   ])[k   ];
+			real mfbba = (D.f[DIR_00M   ])[kb  ];
+			real mfccb = (D.f[DIR_PP0  ])[k   ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw ];
+			real mfcab = (D.f[DIR_PM0  ])[ks  ];
+			real mfacb = (D.f[DIR_MP0  ])[kw  ];
+			real mfcbc = (D.f[DIR_P0P  ])[k   ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw ];
+			real mfcba = (D.f[DIR_P0M  ])[kb  ];
+			real mfabc = (D.f[DIR_M0P  ])[kw  ];
+			real mfbcc = (D.f[DIR_0PP  ])[k   ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs ];
+			real mfbca = (D.f[DIR_0PM  ])[kb  ];
+			real mfbac = (D.f[DIR_0MP  ])[ks  ];
+			real mfbbb = (D.f[DIR_000])[k   ];
+			real mfccc = (D.f[DIR_PPP ])[k   ];
+			real mfaac = (D.f[DIR_MMP ])[ksw ];
+			real mfcac = (D.f[DIR_PMP ])[ks  ];
+			real mfacc = (D.f[DIR_MPP ])[kw  ];
+			real mfcca = (D.f[DIR_PPM ])[kb  ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs ];
+			real mfaca = (D.f[DIR_MPM ])[kbw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -6349,33 +6349,33 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 			mfbbb += drho - drhoPost;
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E   ])[k   ] = mfabb;                                                                   
-			(D.f[W   ])[kw  ] = mfcbb;                                                                 
-			(D.f[N   ])[k   ] = mfbab;
-			(D.f[S   ])[ks  ] = mfbcb;
-			(D.f[T   ])[k   ] = mfbba;
-			(D.f[B   ])[kb  ] = mfbbc;
-			(D.f[NE  ])[k   ] = mfaab;
-			(D.f[SW  ])[ksw ] = mfccb;
-			(D.f[SE  ])[ks  ] = mfacb;
-			(D.f[NW  ])[kw  ] = mfcab;
-			(D.f[TE  ])[k   ] = mfaba;
-			(D.f[BW  ])[kbw ] = mfcbc;
-			(D.f[BE  ])[kb  ] = mfabc;
-			(D.f[TW  ])[kw  ] = mfcba;
-			(D.f[TN  ])[k   ] = mfbaa;
-			(D.f[BS  ])[kbs ] = mfbcc;
-			(D.f[BN  ])[kb  ] = mfbac;
-			(D.f[TS  ])[ks  ] = mfbca;
-			(D.f[REST])[k   ] = mfbbb;
-			(D.f[TNE ])[k   ] = mfaaa;
-			(D.f[TSE ])[ks  ] = mfaca;
-			(D.f[BNE ])[kb  ] = mfaac;
-			(D.f[BSE ])[kbs ] = mfacc;
-			(D.f[TNW ])[kw  ] = mfcaa;
-			(D.f[TSW ])[ksw ] = mfcca;
-			(D.f[BNW ])[kbw ] = mfcac;
-			(D.f[BSW ])[kbsw] = mfccc;
+			(D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
+			(D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
+			(D.f[DIR_0P0   ])[k   ] = mfbab;
+			(D.f[DIR_0M0   ])[ks  ] = mfbcb;
+			(D.f[DIR_00P   ])[k   ] = mfbba;
+			(D.f[DIR_00M   ])[kb  ] = mfbbc;
+			(D.f[DIR_PP0  ])[k   ] = mfaab;
+			(D.f[DIR_MM0  ])[ksw ] = mfccb;
+			(D.f[DIR_PM0  ])[ks  ] = mfacb;
+			(D.f[DIR_MP0  ])[kw  ] = mfcab;
+			(D.f[DIR_P0P  ])[k   ] = mfaba;
+			(D.f[DIR_M0M  ])[kbw ] = mfcbc;
+			(D.f[DIR_P0M  ])[kb  ] = mfabc;
+			(D.f[DIR_M0P  ])[kw  ] = mfcba;
+			(D.f[DIR_0PP  ])[k   ] = mfbaa;
+			(D.f[DIR_0MM  ])[kbs ] = mfbcc;
+			(D.f[DIR_0PM  ])[kb  ] = mfbac;
+			(D.f[DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[DIR_000])[k   ] = mfbbb;
+			(D.f[DIR_PPP ])[k   ] = mfaaa;
+			(D.f[DIR_PMP ])[ks  ] = mfaca;
+			(D.f[DIR_PPM ])[kb  ] = mfaac;
+			(D.f[DIR_PMM ])[kbs ] = mfacc;
+			(D.f[DIR_MPP ])[kw  ] = mfcaa;
+			(D.f[DIR_MMP ])[ksw ] = mfcca;
+			(D.f[DIR_MPM ])[kbw ] = mfcac;
+			(D.f[DIR_MMM ])[kbsw] = mfccc;
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
index 19683722f50f402fdc6dfdf5c7774d41b178874c..97c1aff4d26cb85deaf1dd0d145245f28affc2e3 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant27chim.cu
@@ -44,7 +44,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
@@ -78,63 +78,63 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[E] = &DDStart[E   *size_Mat];
-                D.f[W] = &DDStart[W   *size_Mat];
-                D.f[N] = &DDStart[N   *size_Mat];
-                D.f[S] = &DDStart[S   *size_Mat];
-                D.f[T] = &DDStart[T   *size_Mat];
-                D.f[B] = &DDStart[B   *size_Mat];
-                D.f[NE] = &DDStart[NE  *size_Mat];
-                D.f[SW] = &DDStart[SW  *size_Mat];
-                D.f[SE] = &DDStart[SE  *size_Mat];
-                D.f[NW] = &DDStart[NW  *size_Mat];
-                D.f[TE] = &DDStart[TE  *size_Mat];
-                D.f[BW] = &DDStart[BW  *size_Mat];
-                D.f[BE] = &DDStart[BE  *size_Mat];
-                D.f[TW] = &DDStart[TW  *size_Mat];
-                D.f[TN] = &DDStart[TN  *size_Mat];
-                D.f[BS] = &DDStart[BS  *size_Mat];
-                D.f[BN] = &DDStart[BN  *size_Mat];
-                D.f[TS] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[TNE] = &DDStart[TNE *size_Mat];
-                D.f[TSW] = &DDStart[TSW *size_Mat];
-                D.f[TSE] = &DDStart[TSE *size_Mat];
-                D.f[TNW] = &DDStart[TNW *size_Mat];
-                D.f[BNE] = &DDStart[BNE *size_Mat];
-                D.f[BSW] = &DDStart[BSW *size_Mat];
-                D.f[BSE] = &DDStart[BSE *size_Mat];
-                D.f[BNW] = &DDStart[BNW *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
             }
             else
             {
-                D.f[W] = &DDStart[E   *size_Mat];
-                D.f[E] = &DDStart[W   *size_Mat];
-                D.f[S] = &DDStart[N   *size_Mat];
-                D.f[N] = &DDStart[S   *size_Mat];
-                D.f[B] = &DDStart[T   *size_Mat];
-                D.f[T] = &DDStart[B   *size_Mat];
-                D.f[SW] = &DDStart[NE  *size_Mat];
-                D.f[NE] = &DDStart[SW  *size_Mat];
-                D.f[NW] = &DDStart[SE  *size_Mat];
-                D.f[SE] = &DDStart[NW  *size_Mat];
-                D.f[BW] = &DDStart[TE  *size_Mat];
-                D.f[TE] = &DDStart[BW  *size_Mat];
-                D.f[TW] = &DDStart[BE  *size_Mat];
-                D.f[BE] = &DDStart[TW  *size_Mat];
-                D.f[BS] = &DDStart[TN  *size_Mat];
-                D.f[TN] = &DDStart[BS  *size_Mat];
-                D.f[TS] = &DDStart[BN  *size_Mat];
-                D.f[BN] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[BSW] = &DDStart[TNE *size_Mat];
-                D.f[BNE] = &DDStart[TSW *size_Mat];
-                D.f[BNW] = &DDStart[TSE *size_Mat];
-                D.f[BSE] = &DDStart[TNW *size_Mat];
-                D.f[TSW] = &DDStart[BNE *size_Mat];
-                D.f[TNE] = &DDStart[BSW *size_Mat];
-                D.f[TNW] = &DDStart[BSE *size_Mat];
-                D.f[TSE] = &DDStart[BNW *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -170,33 +170,33 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-            real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-            real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-            real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-            real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-            real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-            real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-            real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-            real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-            real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-            real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-            real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-            real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-            real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-            real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-            real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-            real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-            real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-            real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-            real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-            real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-            real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-            real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-            real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-            real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-            real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-            real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -886,33 +886,33 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
             //		((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
             //mfbbb += drho - drhoPost;
             ////////////////////////////////////////////////////////////////////////////////////
-            (D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-            (D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-            (D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-            (D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-            (D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-            (D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-            (D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-            (D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-            (D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-            (D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-            (D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-            (D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-            (D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-            (D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-            (D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-            (D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-            (D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-            (D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-            (D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-            (D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-            (D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-            (D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-            (D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-            (D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-            (D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-            (D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-            (D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+            (D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+            (D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+            (D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+            (D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+            (D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+            (D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+            (D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+            (D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+            (D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+            (D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+            (D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+            (D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+            (D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+            (D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+            (D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+            (D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+            (D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+            (D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+            (D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+            (D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+            (D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+            (D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+            (D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+            (D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+            (D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+            (D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+            (D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
                                         ////////////////////////////////////////////////////////////////////////////////////
         }
     }
@@ -959,7 +959,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
@@ -993,63 +993,63 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[E] = &DDStart[E   *size_Mat];
-                D.f[W] = &DDStart[W   *size_Mat];
-                D.f[N] = &DDStart[N   *size_Mat];
-                D.f[S] = &DDStart[S   *size_Mat];
-                D.f[T] = &DDStart[T   *size_Mat];
-                D.f[B] = &DDStart[B   *size_Mat];
-                D.f[NE] = &DDStart[NE  *size_Mat];
-                D.f[SW] = &DDStart[SW  *size_Mat];
-                D.f[SE] = &DDStart[SE  *size_Mat];
-                D.f[NW] = &DDStart[NW  *size_Mat];
-                D.f[TE] = &DDStart[TE  *size_Mat];
-                D.f[BW] = &DDStart[BW  *size_Mat];
-                D.f[BE] = &DDStart[BE  *size_Mat];
-                D.f[TW] = &DDStart[TW  *size_Mat];
-                D.f[TN] = &DDStart[TN  *size_Mat];
-                D.f[BS] = &DDStart[BS  *size_Mat];
-                D.f[BN] = &DDStart[BN  *size_Mat];
-                D.f[TS] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[TNE] = &DDStart[TNE *size_Mat];
-                D.f[TSW] = &DDStart[TSW *size_Mat];
-                D.f[TSE] = &DDStart[TSE *size_Mat];
-                D.f[TNW] = &DDStart[TNW *size_Mat];
-                D.f[BNE] = &DDStart[BNE *size_Mat];
-                D.f[BSW] = &DDStart[BSW *size_Mat];
-                D.f[BSE] = &DDStart[BSE *size_Mat];
-                D.f[BNW] = &DDStart[BNW *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
             }
             else
             {
-                D.f[W] = &DDStart[E   *size_Mat];
-                D.f[E] = &DDStart[W   *size_Mat];
-                D.f[S] = &DDStart[N   *size_Mat];
-                D.f[N] = &DDStart[S   *size_Mat];
-                D.f[B] = &DDStart[T   *size_Mat];
-                D.f[T] = &DDStart[B   *size_Mat];
-                D.f[SW] = &DDStart[NE  *size_Mat];
-                D.f[NE] = &DDStart[SW  *size_Mat];
-                D.f[NW] = &DDStart[SE  *size_Mat];
-                D.f[SE] = &DDStart[NW  *size_Mat];
-                D.f[BW] = &DDStart[TE  *size_Mat];
-                D.f[TE] = &DDStart[BW  *size_Mat];
-                D.f[TW] = &DDStart[BE  *size_Mat];
-                D.f[BE] = &DDStart[TW  *size_Mat];
-                D.f[BS] = &DDStart[TN  *size_Mat];
-                D.f[TN] = &DDStart[BS  *size_Mat];
-                D.f[TS] = &DDStart[BN  *size_Mat];
-                D.f[BN] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[BSW] = &DDStart[TNE *size_Mat];
-                D.f[BNE] = &DDStart[TSW *size_Mat];
-                D.f[BNW] = &DDStart[TSE *size_Mat];
-                D.f[BSE] = &DDStart[TNW *size_Mat];
-                D.f[TSW] = &DDStart[BNE *size_Mat];
-                D.f[TNE] = &DDStart[BSW *size_Mat];
-                D.f[TNW] = &DDStart[BSE *size_Mat];
-                D.f[TSE] = &DDStart[BNW *size_Mat];
+                D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
             }
 
             ////////////////////////////////////////////////////////////////////////////////
@@ -1085,33 +1085,33 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 
             //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-            real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-            real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-            real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-            real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-            real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-            real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-            real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-            real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-            real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-            real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-            real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-            real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-            real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-            real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-            real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-            real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-            real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-            real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-            real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-            real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-            real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-            real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-            real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-            real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-            real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-            real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+            real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+            real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+            real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+            real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+            real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+            real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+            real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+            real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+            real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+            real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+            real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+            real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+            real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+            real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+            real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+            real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+            real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+            real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+            real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+            real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+            real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+            real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+            real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+            real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+            real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+            real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+            real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
                                                ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1682,33 +1682,33 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
             mfbbb += drho - drhoPost;
             ////////////////////////////////////////////////////////////////////////////////////
-            (D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-            (D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-            (D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-            (D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-            (D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-            (D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-            (D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-            (D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-            (D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-            (D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-            (D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-            (D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-            (D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-            (D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-            (D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-            (D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-            (D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-            (D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-            (D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-            (D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-            (D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-            (D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-            (D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-            (D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-            (D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-            (D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-            (D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+            (D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+            (D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+            (D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+            (D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+            (D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+            (D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+            (D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+            (D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+            (D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+            (D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+            (D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+            (D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+            (D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+            (D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+            (D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+            (D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+            (D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+            (D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+            (D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+            (D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+            (D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+            (D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+            (D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+            (D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+            (D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+            (D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+            (D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
             ////////////////////////////////////////////////////////////////////////////////////
         }
     }
@@ -1755,7 +1755,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
+__global__ void Cumulant_One_chim_Comp_SP_27(
     real omega,
     unsigned int* bcMatD,
     unsigned int* neighborX,
@@ -1789,63 +1789,63 @@ extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
             Distributions27 D;
             if (EvenOrOdd == true)
             {
-                D.f[E   ] = &DDStart[E   *size_Mat];
-                D.f[W   ] = &DDStart[W   *size_Mat];
-                D.f[N   ] = &DDStart[N   *size_Mat];
-                D.f[S   ] = &DDStart[S   *size_Mat];
-                D.f[T   ] = &DDStart[T   *size_Mat];
-                D.f[B   ] = &DDStart[B   *size_Mat];
-                D.f[NE  ] = &DDStart[NE  *size_Mat];
-                D.f[SW  ] = &DDStart[SW  *size_Mat];
-                D.f[SE  ] = &DDStart[SE  *size_Mat];
-                D.f[NW  ] = &DDStart[NW  *size_Mat];
-                D.f[TE  ] = &DDStart[TE  *size_Mat];
-                D.f[BW  ] = &DDStart[BW  *size_Mat];
-                D.f[BE  ] = &DDStart[BE  *size_Mat];
-                D.f[TW  ] = &DDStart[TW  *size_Mat];
-                D.f[TN  ] = &DDStart[TN  *size_Mat];
-                D.f[BS  ] = &DDStart[BS  *size_Mat];
-                D.f[BN  ] = &DDStart[BN  *size_Mat];
-                D.f[TS  ] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[TNE ] = &DDStart[TNE *size_Mat];
-                D.f[TSW ] = &DDStart[TSW *size_Mat];
-                D.f[TSE ] = &DDStart[TSE *size_Mat];
-                D.f[TNW ] = &DDStart[TNW *size_Mat];
-                D.f[BNE ] = &DDStart[BNE *size_Mat];
-                D.f[BSW ] = &DDStart[BSW *size_Mat];
-                D.f[BSE ] = &DDStart[BSE *size_Mat];
-                D.f[BNW ] = &DDStart[BNW *size_Mat];
+                D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
             }
             else
             {
-                D.f[W   ] = &DDStart[E   *size_Mat];
-                D.f[E   ] = &DDStart[W   *size_Mat];
-                D.f[S   ] = &DDStart[N   *size_Mat];
-                D.f[N   ] = &DDStart[S   *size_Mat];
-                D.f[B   ] = &DDStart[T   *size_Mat];
-                D.f[T   ] = &DDStart[B   *size_Mat];
-                D.f[SW  ] = &DDStart[NE  *size_Mat];
-                D.f[NE  ] = &DDStart[SW  *size_Mat];
-                D.f[NW  ] = &DDStart[SE  *size_Mat];
-                D.f[SE  ] = &DDStart[NW  *size_Mat];
-                D.f[BW  ] = &DDStart[TE  *size_Mat];
-                D.f[TE  ] = &DDStart[BW  *size_Mat];
-                D.f[TW  ] = &DDStart[BE  *size_Mat];
-                D.f[BE  ] = &DDStart[TW  *size_Mat];
-                D.f[BS  ] = &DDStart[TN  *size_Mat];
-                D.f[TN  ] = &DDStart[BS  *size_Mat];
-                D.f[TS  ] = &DDStart[BN  *size_Mat];
-                D.f[BN  ] = &DDStart[TS  *size_Mat];
-                D.f[REST] = &DDStart[REST*size_Mat];
-                D.f[BSW ] = &DDStart[TNE *size_Mat];
-                D.f[BNE ] = &DDStart[TSW *size_Mat];
-                D.f[BNW ] = &DDStart[TSE *size_Mat];
-                D.f[BSE ] = &DDStart[TNW *size_Mat];
-                D.f[TSW ] = &DDStart[BNE *size_Mat];
-                D.f[TNE ] = &DDStart[BSW *size_Mat];
-                D.f[TNW ] = &DDStart[BSE *size_Mat];
-                D.f[TSE ] = &DDStart[BNW *size_Mat];
+                D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+                D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+                D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+                D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+                D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+                D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+                D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+                D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+                D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+                D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+                D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+                D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+                D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+                D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+                D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+                D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+                D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+                D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+                D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+                D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+                D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+                D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
             }
             ////////////////////////////////////////////////////////////////////////////////
             //index
@@ -1857,33 +1857,33 @@ extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
             unsigned int kbs = neighborZ[ks];
             unsigned int kbsw = neighborZ[ksw];
             ////////////////////////////////////////////////////////////////////////////////////
-            real mfcbb = (D.f[E   ])[k   ];
-            real mfabb = (D.f[W   ])[kw  ];
-            real mfbcb = (D.f[N   ])[k   ];
-            real mfbab = (D.f[S   ])[ks  ];
-            real mfbbc = (D.f[T   ])[k   ];
-            real mfbba = (D.f[B   ])[kb  ];
-            real mfccb = (D.f[NE  ])[k   ];
-            real mfaab = (D.f[SW  ])[ksw ];
-            real mfcab = (D.f[SE  ])[ks  ];
-            real mfacb = (D.f[NW  ])[kw  ];
-            real mfcbc = (D.f[TE  ])[k   ];
-            real mfaba = (D.f[BW  ])[kbw ];
-            real mfcba = (D.f[BE  ])[kb  ];
-            real mfabc = (D.f[TW  ])[kw  ];
-            real mfbcc = (D.f[TN  ])[k   ];
-            real mfbaa = (D.f[BS  ])[kbs ];
-            real mfbca = (D.f[BN  ])[kb  ];
-            real mfbac = (D.f[TS  ])[ks  ];
-            real mfbbb = (D.f[REST])[k   ];
-            real mfccc = (D.f[TNE ])[k   ];
-            real mfaac = (D.f[TSW ])[ksw ];
-            real mfcac = (D.f[TSE ])[ks  ];
-            real mfacc = (D.f[TNW ])[kw  ];
-            real mfcca = (D.f[BNE ])[kb  ];
-            real mfaaa = (D.f[BSW ])[kbsw];
-            real mfcaa = (D.f[BSE ])[kbs ];
-            real mfaca = (D.f[BNW ])[kbw ];
+            real mfcbb = (D.f[DIR_P00   ])[k   ];
+            real mfabb = (D.f[DIR_M00   ])[kw  ];
+            real mfbcb = (D.f[DIR_0P0   ])[k   ];
+            real mfbab = (D.f[DIR_0M0   ])[ks  ];
+            real mfbbc = (D.f[DIR_00P   ])[k   ];
+            real mfbba = (D.f[DIR_00M   ])[kb  ];
+            real mfccb = (D.f[DIR_PP0  ])[k   ];
+            real mfaab = (D.f[DIR_MM0  ])[ksw ];
+            real mfcab = (D.f[DIR_PM0  ])[ks  ];
+            real mfacb = (D.f[DIR_MP0  ])[kw  ];
+            real mfcbc = (D.f[DIR_P0P  ])[k   ];
+            real mfaba = (D.f[DIR_M0M  ])[kbw ];
+            real mfcba = (D.f[DIR_P0M  ])[kb  ];
+            real mfabc = (D.f[DIR_M0P  ])[kw  ];
+            real mfbcc = (D.f[DIR_0PP  ])[k   ];
+            real mfbaa = (D.f[DIR_0MM  ])[kbs ];
+            real mfbca = (D.f[DIR_0PM  ])[kb  ];
+            real mfbac = (D.f[DIR_0MP  ])[ks  ];
+            real mfbbb = (D.f[DIR_000])[k   ];
+            real mfccc = (D.f[DIR_PPP ])[k   ];
+            real mfaac = (D.f[DIR_MMP ])[ksw ];
+            real mfcac = (D.f[DIR_PMP ])[ks  ];
+            real mfacc = (D.f[DIR_MPP ])[kw  ];
+            real mfcca = (D.f[DIR_PPM ])[kb  ];
+            real mfaaa = (D.f[DIR_MMM ])[kbsw];
+            real mfcaa = (D.f[DIR_PMM ])[kbs ];
+            real mfaca = (D.f[DIR_MPM ])[kbw ];
             ////////////////////////////////////////////////////////////////////////////////////
             real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
                 (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -2204,33 +2204,33 @@ extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
             mfbbb += drho - drhoPost;
             ////////////////////////////////////////////////////////////////////////////////////
-            (D.f[E   ])[k   ] = mfabb;                                                                   
-            (D.f[W   ])[kw  ] = mfcbb;                                                                 
-            (D.f[N   ])[k   ] = mfbab;
-            (D.f[S   ])[ks  ] = mfbcb;
-            (D.f[T   ])[k   ] = mfbba;
-            (D.f[B   ])[kb  ] = mfbbc;
-            (D.f[NE  ])[k   ] = mfaab;
-            (D.f[SW  ])[ksw ] = mfccb;
-            (D.f[SE  ])[ks  ] = mfacb;
-            (D.f[NW  ])[kw  ] = mfcab;
-            (D.f[TE  ])[k   ] = mfaba;
-            (D.f[BW  ])[kbw ] = mfcbc;
-            (D.f[BE  ])[kb  ] = mfabc;
-            (D.f[TW  ])[kw  ] = mfcba;
-            (D.f[TN  ])[k   ] = mfbaa;
-            (D.f[BS  ])[kbs ] = mfbcc;
-            (D.f[BN  ])[kb  ] = mfbac;
-            (D.f[TS  ])[ks  ] = mfbca;
-            (D.f[REST])[k   ] = mfbbb;
-            (D.f[TNE ])[k   ] = mfaaa;
-            (D.f[TSE ])[ks  ] = mfaca;
-            (D.f[BNE ])[kb  ] = mfaac;
-            (D.f[BSE ])[kbs ] = mfacc;
-            (D.f[TNW ])[kw  ] = mfcaa;
-            (D.f[TSW ])[ksw ] = mfcca;
-            (D.f[BNW ])[kbw ] = mfcac;
-            (D.f[BSW ])[kbsw] = mfccc;
+            (D.f[DIR_P00   ])[k   ] = mfabb;                                                                   
+            (D.f[DIR_M00   ])[kw  ] = mfcbb;                                                                 
+            (D.f[DIR_0P0   ])[k   ] = mfbab;
+            (D.f[DIR_0M0   ])[ks  ] = mfbcb;
+            (D.f[DIR_00P   ])[k   ] = mfbba;
+            (D.f[DIR_00M   ])[kb  ] = mfbbc;
+            (D.f[DIR_PP0  ])[k   ] = mfaab;
+            (D.f[DIR_MM0  ])[ksw ] = mfccb;
+            (D.f[DIR_PM0  ])[ks  ] = mfacb;
+            (D.f[DIR_MP0  ])[kw  ] = mfcab;
+            (D.f[DIR_P0P  ])[k   ] = mfaba;
+            (D.f[DIR_M0M  ])[kbw ] = mfcbc;
+            (D.f[DIR_P0M  ])[kb  ] = mfabc;
+            (D.f[DIR_M0P  ])[kw  ] = mfcba;
+            (D.f[DIR_0PP  ])[k   ] = mfbaa;
+            (D.f[DIR_0MM  ])[kbs ] = mfbcc;
+            (D.f[DIR_0PM  ])[kb  ] = mfbac;
+            (D.f[DIR_0MP  ])[ks  ] = mfbca;
+            (D.f[DIR_000])[k   ] = mfbbb;
+            (D.f[DIR_PPP ])[k   ] = mfaaa;
+            (D.f[DIR_PMP ])[ks  ] = mfaca;
+            (D.f[DIR_PPM ])[kb  ] = mfaac;
+            (D.f[DIR_PMM ])[kbs ] = mfacc;
+            (D.f[DIR_MPP ])[kw  ] = mfcaa;
+            (D.f[DIR_MMP ])[ksw ] = mfcca;
+            (D.f[DIR_MPM ])[kbw ] = mfcac;
+            (D.f[DIR_MMM ])[kbsw] = mfccc;
         }
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
index e6b696768bfa604f01bd92d34dde95100b8a29f6..7adfd40da157d825d83c63b084bf1f855ea6dca2 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Cumulant_F3_27.cu
@@ -16,7 +16,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 /////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
+__global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 															 unsigned int* bcMatD,
 															 unsigned int* neighborX,
 															 unsigned int* neighborY,
@@ -54,83 +54,83 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[E] = &G6[E   *size_Mat];
-				G.g[W] = &G6[W   *size_Mat];
-				G.g[N] = &G6[N   *size_Mat];
-				G.g[S] = &G6[S   *size_Mat];
-				G.g[T] = &G6[T   *size_Mat];
-				G.g[B] = &G6[B   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 			}
 			else
 			{
-				G.g[W] = &G6[E   *size_Mat];
-				G.g[E] = &G6[W   *size_Mat];
-				G.g[S] = &G6[N   *size_Mat];
-				G.g[N] = &G6[S   *size_Mat];
-				G.g[B] = &G6[T   *size_Mat];
-				G.g[T] = &G6[B   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -163,43 +163,43 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mgcbb = (G.g[E])[k];
-			real mgabb = (G.g[W])[kw];
-			real mgbcb = (G.g[N])[k];
-			real mgbab = (G.g[S])[ks];
-			real mgbbc = (G.g[T])[k];
-			real mgbba = (G.g[B])[kb];
+			real mgcbb = (G.g[DIR_P00])[k];
+			real mgabb = (G.g[DIR_M00])[kw];
+			real mgbcb = (G.g[DIR_0P0])[k];
+			real mgbab = (G.g[DIR_0M0])[ks];
+			real mgbbc = (G.g[DIR_00P])[k];
+			real mgbba = (G.g[DIR_00M])[kb];
 			real dxuxdxux = c1o2 * (-mgcbb + mgabb);
 			real dyuydyuy = c1o2 * (-mgbcb + mgbab);
 			real dzuzdzuz = c1o2 * (-mgbbc + mgbba);
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -913,41 +913,41 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 			//////////////////////////////////////////////////////////////////////////////////////
 
 			//////////////////////////////////////////////////////////////////////////////////////
-			//(D.f[E])[k] = mfabb;
-			//(D.f[W])[kw] = mfcbb;
-			//(D.f[N])[k] = mfbab;
-			//(D.f[S])[ks] = mfbcb;
-			//(D.f[T])[k] = mfbba;
-			//(D.f[B])[kb] = mfbbc;
-			//(D.f[NE])[k] = mfaab;
-			//(D.f[SW])[ksw] = mfccb;
-			//(D.f[SE])[ks] = mfacb;
-			//(D.f[NW])[kw] = mfcab;
-			//(D.f[TE])[k] = mfaba;
-			//(D.f[BW])[kbw] = mfcbc;
-			//(D.f[BE])[kb] = mfabc;
-			//(D.f[TW])[kw] = mfcba;
-			//(D.f[TN])[k] = mfbaa;
-			//(D.f[BS])[kbs] = mfbcc;
-			//(D.f[BN])[kb] = mfbac;
-			//(D.f[TS])[ks] = mfbca;
-			//(D.f[REST])[k] = mfbbb;
-			//(D.f[TNE])[k] = mfaaa;
-			//(D.f[TSE])[ks] = mfaca;
-			//(D.f[BNE])[kb] = mfaac;
-			//(D.f[BSE])[kbs] = mfacc;
-			//(D.f[TNW])[kw] = mfcaa;
-			//(D.f[TSW])[ksw] = mfcca;
-			//(D.f[BNW])[kbw] = mfcac;
-			//(D.f[BSW])[kbsw] = mfccc;
+			//(D.f[DIR_P00])[k] = mfabb;
+			//(D.f[DIR_M00])[kw] = mfcbb;
+			//(D.f[DIR_0P0])[k] = mfbab;
+			//(D.f[DIR_0M0])[ks] = mfbcb;
+			//(D.f[DIR_00P])[k] = mfbba;
+			//(D.f[DIR_00M])[kb] = mfbbc;
+			//(D.f[DIR_PP0])[k] = mfaab;
+			//(D.f[DIR_MM0])[ksw] = mfccb;
+			//(D.f[DIR_PM0])[ks] = mfacb;
+			//(D.f[DIR_MP0])[kw] = mfcab;
+			//(D.f[DIR_P0P])[k] = mfaba;
+			//(D.f[DIR_M0M])[kbw] = mfcbc;
+			//(D.f[DIR_P0M])[kb] = mfabc;
+			//(D.f[DIR_M0P])[kw] = mfcba;
+			//(D.f[DIR_0PP])[k] = mfbaa;
+			//(D.f[DIR_0MM])[kbs] = mfbcc;
+			//(D.f[DIR_0PM])[kb] = mfbac;
+			//(D.f[DIR_0MP])[ks] = mfbca;
+			//(D.f[DIR_000])[k] = mfbbb;
+			//(D.f[DIR_PPP])[k] = mfaaa;
+			//(D.f[DIR_PMP])[ks] = mfaca;
+			//(D.f[DIR_PPM])[kb] = mfaac;
+			//(D.f[DIR_PMM])[kbs] = mfacc;
+			//(D.f[DIR_MPP])[kw] = mfcaa;
+			//(D.f[DIR_MMP])[ksw] = mfcca;
+			//(D.f[DIR_MPM])[kbw] = mfcac;
+			//(D.f[DIR_MMM])[kbsw] = mfccc;
 			//////////////////////////////////////////////////////////////////////////////////////
 
-			//(G.g[E])[k] = mgabb;
-			//(G.g[W])[kw] = mgcbb;
-			//(G.g[N])[k] = mgbab;
-			//(G.g[S])[ks] = mgbcb;
-			//(G.g[T])[k] = mgbba;
-			//(G.g[B])[kb] = mgbbc;
+			//(G.g[DIR_P00])[k] = mgabb;
+			//(G.g[DIR_M00])[kw] = mgcbb;
+			//(G.g[DIR_0P0])[k] = mgbab;
+			//(G.g[DIR_0M0])[ks] = mgbcb;
+			//(G.g[DIR_00P])[k] = mgbba;
+			//(G.g[DIR_00M])[kb] = mgbbc;
 		}
 	}
 }
@@ -992,7 +992,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 
 
 ///////////////////////////////////////////////////////////////////////////////////
-//extern "C" __global__ void LB_Kernel_Cumulant_D3Q27F3_2018(	real omega,
+//__global__ void LB_Kernel_Cumulant_D3Q27F3_2018(	real omega,
 //															unsigned int* bcMatD,
 //															unsigned int* neighborX,
 //															unsigned int* neighborY,
@@ -1026,83 +1026,83 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[E] = &DDStart[E   *size_Mat];
-//				D.f[W] = &DDStart[W   *size_Mat];
-//				D.f[N] = &DDStart[N   *size_Mat];
-//				D.f[S] = &DDStart[S   *size_Mat];
-//				D.f[T] = &DDStart[T   *size_Mat];
-//				D.f[B] = &DDStart[B   *size_Mat];
-//				D.f[NE] = &DDStart[NE  *size_Mat];
-//				D.f[SW] = &DDStart[SW  *size_Mat];
-//				D.f[SE] = &DDStart[SE  *size_Mat];
-//				D.f[NW] = &DDStart[NW  *size_Mat];
-//				D.f[TE] = &DDStart[TE  *size_Mat];
-//				D.f[BW] = &DDStart[BW  *size_Mat];
-//				D.f[BE] = &DDStart[BE  *size_Mat];
-//				D.f[TW] = &DDStart[TW  *size_Mat];
-//				D.f[TN] = &DDStart[TN  *size_Mat];
-//				D.f[BS] = &DDStart[BS  *size_Mat];
-//				D.f[BN] = &DDStart[BN  *size_Mat];
-//				D.f[TS] = &DDStart[TS  *size_Mat];
-//				D.f[REST] = &DDStart[REST*size_Mat];
-//				D.f[TNE] = &DDStart[TNE *size_Mat];
-//				D.f[TSW] = &DDStart[TSW *size_Mat];
-//				D.f[TSE] = &DDStart[TSE *size_Mat];
-//				D.f[TNW] = &DDStart[TNW *size_Mat];
-//				D.f[BNE] = &DDStart[BNE *size_Mat];
-//				D.f[BSW] = &DDStart[BSW *size_Mat];
-//				D.f[BSE] = &DDStart[BSE *size_Mat];
-//				D.f[BNW] = &DDStart[BNW *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 //			}
 //			else
 //			{
-//				D.f[W] = &DDStart[E   *size_Mat];
-//				D.f[E] = &DDStart[W   *size_Mat];
-//				D.f[S] = &DDStart[N   *size_Mat];
-//				D.f[N] = &DDStart[S   *size_Mat];
-//				D.f[B] = &DDStart[T   *size_Mat];
-//				D.f[T] = &DDStart[B   *size_Mat];
-//				D.f[SW] = &DDStart[NE  *size_Mat];
-//				D.f[NE] = &DDStart[SW  *size_Mat];
-//				D.f[NW] = &DDStart[SE  *size_Mat];
-//				D.f[SE] = &DDStart[NW  *size_Mat];
-//				D.f[BW] = &DDStart[TE  *size_Mat];
-//				D.f[TE] = &DDStart[BW  *size_Mat];
-//				D.f[TW] = &DDStart[BE  *size_Mat];
-//				D.f[BE] = &DDStart[TW  *size_Mat];
-//				D.f[BS] = &DDStart[TN  *size_Mat];
-//				D.f[TN] = &DDStart[BS  *size_Mat];
-//				D.f[TS] = &DDStart[BN  *size_Mat];
-//				D.f[BN] = &DDStart[TS  *size_Mat];
-//				D.f[REST] = &DDStart[REST*size_Mat];
-//				D.f[BSW] = &DDStart[TNE *size_Mat];
-//				D.f[BNE] = &DDStart[TSW *size_Mat];
-//				D.f[BNW] = &DDStart[TSE *size_Mat];
-//				D.f[BSE] = &DDStart[TNW *size_Mat];
-//				D.f[TSW] = &DDStart[BNE *size_Mat];
-//				D.f[TNE] = &DDStart[BSW *size_Mat];
-//				D.f[TNW] = &DDStart[BSE *size_Mat];
-//				D.f[TSE] = &DDStart[BNW *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[E] = &G6[E   *size_Mat];
-//				G.g[W] = &G6[W   *size_Mat];
-//				G.g[N] = &G6[N   *size_Mat];
-//				G.g[S] = &G6[S   *size_Mat];
-//				G.g[T] = &G6[T   *size_Mat];
-//				G.g[B] = &G6[B   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 //			}
 //			else
 //			{
-//				G.g[W] = &G6[E   *size_Mat];
-//				G.g[E] = &G6[W   *size_Mat];
-//				G.g[S] = &G6[N   *size_Mat];
-//				G.g[N] = &G6[S   *size_Mat];
-//				G.g[B] = &G6[T   *size_Mat];
-//				G.g[T] = &G6[B   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -1135,43 +1135,43 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			//unsigned int ktne = k;
 //			unsigned int kbsw = neighborZ[ksw];
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mgcbb = (G.g[E])[k];
-//			real mgabb = (G.g[W])[kw];
-//			real mgbcb = (G.g[N])[k];
-//			real mgbab = (G.g[S])[ks];
-//			real mgbbc = (G.g[T])[k];
-//			real mgbba = (G.g[B])[kb];
+//			real mgcbb = (G.g[DIR_P00])[k];
+//			real mgabb = (G.g[DIR_M00])[kw];
+//			real mgbcb = (G.g[DIR_0P0])[k];
+//			real mgbab = (G.g[DIR_0M0])[ks];
+//			real mgbbc = (G.g[DIR_00P])[k];
+//			real mgbba = (G.g[DIR_00M])[kb];
 //			real dxuxdxux = c1o2 * (-mgcbb + mgabb);
 //			real dyuydyuy = c1o2 * (-mgbcb + mgbab);
 //			real dzuzdzuz = c1o2 * (-mgbbc + mgbba);
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mfcbb = (D.f[E])[k];
-//			real mfabb = (D.f[W])[kw];
-//			real mfbcb = (D.f[N])[k];
-//			real mfbab = (D.f[S])[ks];
-//			real mfbbc = (D.f[T])[k];
-//			real mfbba = (D.f[B])[kb];
-//			real mfccb = (D.f[NE])[k];
-//			real mfaab = (D.f[SW])[ksw];
-//			real mfcab = (D.f[SE])[ks];
-//			real mfacb = (D.f[NW])[kw];
-//			real mfcbc = (D.f[TE])[k];
-//			real mfaba = (D.f[BW])[kbw];
-//			real mfcba = (D.f[BE])[kb];
-//			real mfabc = (D.f[TW])[kw];
-//			real mfbcc = (D.f[TN])[k];
-//			real mfbaa = (D.f[BS])[kbs];
-//			real mfbca = (D.f[BN])[kb];
-//			real mfbac = (D.f[TS])[ks];
-//			real mfbbb = (D.f[REST])[k];
-//			real mfccc = (D.f[TNE])[k];
-//			real mfaac = (D.f[TSW])[ksw];
-//			real mfcac = (D.f[TSE])[ks];
-//			real mfacc = (D.f[TNW])[kw];
-//			real mfcca = (D.f[BNE])[kb];
-//			real mfaaa = (D.f[BSW])[kbsw];
-//			real mfcaa = (D.f[BSE])[kbs];
-//			real mfaca = (D.f[BNW])[kbw];
+//			real mfcbb = (D.f[DIR_P00])[k];
+//			real mfabb = (D.f[DIR_M00])[kw];
+//			real mfbcb = (D.f[DIR_0P0])[k];
+//			real mfbab = (D.f[DIR_0M0])[ks];
+//			real mfbbc = (D.f[DIR_00P])[k];
+//			real mfbba = (D.f[DIR_00M])[kb];
+//			real mfccb = (D.f[DIR_PP0])[k];
+//			real mfaab = (D.f[DIR_MM0])[ksw];
+//			real mfcab = (D.f[DIR_PM0])[ks];
+//			real mfacb = (D.f[DIR_MP0])[kw];
+//			real mfcbc = (D.f[DIR_P0P])[k];
+//			real mfaba = (D.f[DIR_M0M])[kbw];
+//			real mfcba = (D.f[DIR_P0M])[kb];
+//			real mfabc = (D.f[DIR_M0P])[kw];
+//			real mfbcc = (D.f[DIR_0PP])[k];
+//			real mfbaa = (D.f[DIR_0MM])[kbs];
+//			real mfbca = (D.f[DIR_0PM])[kb];
+//			real mfbac = (D.f[DIR_0MP])[ks];
+//			real mfbbb = (D.f[DIR_000])[k];
+//			real mfccc = (D.f[DIR_PPP])[k];
+//			real mfaac = (D.f[DIR_MMP])[ksw];
+//			real mfcac = (D.f[DIR_PMP])[ks];
+//			real mfacc = (D.f[DIR_MPP])[kw];
+//			real mfcca = (D.f[DIR_PPM])[kb];
+//			real mfaaa = (D.f[DIR_MMM])[kbsw];
+//			real mfcaa = (D.f[DIR_PMM])[kbs];
+//			real mfaca = (D.f[DIR_MPM])[kbw];
 //			////////////////////////////////////////////////////////////////////////////////////
 //			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 //				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1893,41 +1893,41 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			////////////////////////////////////////////////////////////////////////////////////
 //
 //			////////////////////////////////////////////////////////////////////////////////////
-//			(D.f[E])[k] = mfabb;   
-//			(D.f[W])[kw] = mfcbb;  
-//			(D.f[N])[k] = mfbab;   
-//			(D.f[S])[ks] = mfbcb;  
-//			(D.f[T])[k] = mfbba;   
-//			(D.f[B])[kb] = mfbbc;  
-//			(D.f[NE])[k] = mfaab;  
-//			(D.f[SW])[ksw] = mfccb;
-//			(D.f[SE])[ks] = mfacb; 
-//			(D.f[NW])[kw] = mfcab; 
-//			(D.f[TE])[k] = mfaba;  
-//			(D.f[BW])[kbw] = mfcbc;
-//			(D.f[BE])[kb] = mfabc; 
-//			(D.f[TW])[kw] = mfcba; 
-//			(D.f[TN])[k] = mfbaa;  
-//			(D.f[BS])[kbs] = mfbcc;
-//			(D.f[BN])[kb] = mfbac; 
-//			(D.f[TS])[ks] = mfbca; 
-//			(D.f[REST])[k] = mfbbb;
-//			(D.f[TNE])[k] = mfaaa; 
-//			(D.f[TSE])[ks] = mfaca;
-//			(D.f[BNE])[kb] = mfaac;
-//			(D.f[BSE])[kbs] = mfacc;
-//			(D.f[TNW])[kw] = mfcaa;
-//			(D.f[TSW])[ksw] = mfcca;
-//			(D.f[BNW])[kbw] = mfcac;
-//			(D.f[BSW])[kbsw] = mfccc;
-//			////////////////////////////////////////////////////////////////////////////////////
-//
-//			(G.g[E])[k]  = mgabb;                                                               
-//			(G.g[W])[kw] = mgcbb;                                                              
-//			(G.g[N])[k]  = mgbab;
-//			(G.g[S])[ks] = mgbcb;
-//			(G.g[T])[k]  = mgbba;
-//			(G.g[B])[kb] = mgbbc;
+//			(D.f[DIR_P00])[k] = mfabb;   
+//			(D.f[DIR_M00])[kw] = mfcbb;  
+//			(D.f[DIR_0P0])[k] = mfbab;   
+//			(D.f[DIR_0M0])[ks] = mfbcb;  
+//			(D.f[DIR_00P])[k] = mfbba;   
+//			(D.f[DIR_00M])[kb] = mfbbc;  
+//			(D.f[DIR_PP0])[k] = mfaab;  
+//			(D.f[DIR_MM0])[ksw] = mfccb;
+//			(D.f[DIR_PM0])[ks] = mfacb; 
+//			(D.f[DIR_MP0])[kw] = mfcab; 
+//			(D.f[DIR_P0P])[k] = mfaba;  
+//			(D.f[DIR_M0M])[kbw] = mfcbc;
+//			(D.f[DIR_P0M])[kb] = mfabc; 
+//			(D.f[DIR_M0P])[kw] = mfcba; 
+//			(D.f[DIR_0PP])[k] = mfbaa;  
+//			(D.f[DIR_0MM])[kbs] = mfbcc;
+//			(D.f[DIR_0PM])[kb] = mfbac; 
+//			(D.f[DIR_0MP])[ks] = mfbca; 
+//			(D.f[DIR_000])[k] = mfbbb;
+//			(D.f[DIR_PPP])[k] = mfaaa; 
+//			(D.f[DIR_PMP])[ks] = mfaca;
+//			(D.f[DIR_PPM])[kb] = mfaac;
+//			(D.f[DIR_PMM])[kbs] = mfacc;
+//			(D.f[DIR_MPP])[kw] = mfcaa;
+//			(D.f[DIR_MMP])[ksw] = mfcca;
+//			(D.f[DIR_MPM])[kbw] = mfcac;
+//			(D.f[DIR_MMM])[kbsw] = mfccc;
+//			////////////////////////////////////////////////////////////////////////////////////
+//
+//			(G.g[DIR_P00])[k]  = mgabb;                                                               
+//			(G.g[DIR_M00])[kw] = mgcbb;                                                              
+//			(G.g[DIR_0P0])[k]  = mgbab;
+//			(G.g[DIR_0M0])[ks] = mgbcb;
+//			(G.g[DIR_00P])[k]  = mgbba;
+//			(G.g[DIR_00M])[kb] = mgbbc;
 //		}
 //	}
 //}
@@ -1972,7 +1972,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 
 
 /////////////////////////////////////////////////////////////////////////////////
-//extern "C" __global__ void LB_Kernel_Cumulant_D3Q27F3(	real omega,
+//__global__ void LB_Kernel_Cumulant_D3Q27F3(	real omega,
 //														unsigned int* bcMatD,
 //														unsigned int* neighborX,
 //														unsigned int* neighborY,
@@ -2006,83 +2006,83 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			Distributions27 D;
 //			if (EvenOrOdd == true)
 //			{
-//				D.f[E] = &DDStart[E   *size_Mat];
-//				D.f[W] = &DDStart[W   *size_Mat];
-//				D.f[N] = &DDStart[N   *size_Mat];
-//				D.f[S] = &DDStart[S   *size_Mat];
-//				D.f[T] = &DDStart[T   *size_Mat];
-//				D.f[B] = &DDStart[B   *size_Mat];
-//				D.f[NE] = &DDStart[NE  *size_Mat];
-//				D.f[SW] = &DDStart[SW  *size_Mat];
-//				D.f[SE] = &DDStart[SE  *size_Mat];
-//				D.f[NW] = &DDStart[NW  *size_Mat];
-//				D.f[TE] = &DDStart[TE  *size_Mat];
-//				D.f[BW] = &DDStart[BW  *size_Mat];
-//				D.f[BE] = &DDStart[BE  *size_Mat];
-//				D.f[TW] = &DDStart[TW  *size_Mat];
-//				D.f[TN] = &DDStart[TN  *size_Mat];
-//				D.f[BS] = &DDStart[BS  *size_Mat];
-//				D.f[BN] = &DDStart[BN  *size_Mat];
-//				D.f[TS] = &DDStart[TS  *size_Mat];
-//				D.f[REST] = &DDStart[REST*size_Mat];
-//				D.f[TNE] = &DDStart[TNE *size_Mat];
-//				D.f[TSW] = &DDStart[TSW *size_Mat];
-//				D.f[TSE] = &DDStart[TSE *size_Mat];
-//				D.f[TNW] = &DDStart[TNW *size_Mat];
-//				D.f[BNE] = &DDStart[BNE *size_Mat];
-//				D.f[BSW] = &DDStart[BSW *size_Mat];
-//				D.f[BSE] = &DDStart[BSE *size_Mat];
-//				D.f[BNW] = &DDStart[BNW *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 //			}
 //			else
 //			{
-//				D.f[W] = &DDStart[E   *size_Mat];
-//				D.f[E] = &DDStart[W   *size_Mat];
-//				D.f[S] = &DDStart[N   *size_Mat];
-//				D.f[N] = &DDStart[S   *size_Mat];
-//				D.f[B] = &DDStart[T   *size_Mat];
-//				D.f[T] = &DDStart[B   *size_Mat];
-//				D.f[SW] = &DDStart[NE  *size_Mat];
-//				D.f[NE] = &DDStart[SW  *size_Mat];
-//				D.f[NW] = &DDStart[SE  *size_Mat];
-//				D.f[SE] = &DDStart[NW  *size_Mat];
-//				D.f[BW] = &DDStart[TE  *size_Mat];
-//				D.f[TE] = &DDStart[BW  *size_Mat];
-//				D.f[TW] = &DDStart[BE  *size_Mat];
-//				D.f[BE] = &DDStart[TW  *size_Mat];
-//				D.f[BS] = &DDStart[TN  *size_Mat];
-//				D.f[TN] = &DDStart[BS  *size_Mat];
-//				D.f[TS] = &DDStart[BN  *size_Mat];
-//				D.f[BN] = &DDStart[TS  *size_Mat];
-//				D.f[REST] = &DDStart[REST*size_Mat];
-//				D.f[BSW] = &DDStart[TNE *size_Mat];
-//				D.f[BNE] = &DDStart[TSW *size_Mat];
-//				D.f[BNW] = &DDStart[TSE *size_Mat];
-//				D.f[BSE] = &DDStart[TNW *size_Mat];
-//				D.f[TSW] = &DDStart[BNE *size_Mat];
-//				D.f[TNE] = &DDStart[BSW *size_Mat];
-//				D.f[TNW] = &DDStart[BSE *size_Mat];
-//				D.f[TSE] = &DDStart[BNW *size_Mat];
+//				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+//				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+//				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+//				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+//				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+//				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+//				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+//				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+//				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+//				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+//				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+//				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+//				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+//				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+//				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+//				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+//				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+//				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+//				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+//				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+//				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+//				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+//				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+//				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+//				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+//				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+//				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 //			}
 //
 //			Distributions6 G;
 //			if (EvenOrOdd == true)
 //			{
-//				G.g[E] = &G6[E   *size_Mat];
-//				G.g[W] = &G6[W   *size_Mat];
-//				G.g[N] = &G6[N   *size_Mat];
-//				G.g[S] = &G6[S   *size_Mat];
-//				G.g[T] = &G6[T   *size_Mat];
-//				G.g[B] = &G6[B   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 //			}
 //			else
 //			{
-//				G.g[W] = &G6[E   *size_Mat];
-//				G.g[E] = &G6[W   *size_Mat];
-//				G.g[S] = &G6[N   *size_Mat];
-//				G.g[N] = &G6[S   *size_Mat];
-//				G.g[B] = &G6[T   *size_Mat];
-//				G.g[T] = &G6[B   *size_Mat];
+//				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+//				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+//				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+//				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+//				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+//				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 //			}
 //
 //			////////////////////////////////////////////////////////////////////////////////
@@ -2143,43 +2143,43 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			//unsigned int ktne = k;
 //			//unsigned int kbsw = neighborZ[ksw];
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mgcbb = (G.g[E])[k];
-//			real mgabb = (G.g[W])[kw];
-//			real mgbcb = (G.g[N])[k];
-//			real mgbab = (G.g[S])[ks];
-//			real mgbbc = (G.g[T])[k];
-//			real mgbba = (G.g[B])[kb];
+//			real mgcbb = (G.g[DIR_P00])[k];
+//			real mgabb = (G.g[DIR_M00])[kw];
+//			real mgbcb = (G.g[DIR_0P0])[k];
+//			real mgbab = (G.g[DIR_0M0])[ks];
+//			real mgbbc = (G.g[DIR_00P])[k];
+//			real mgbba = (G.g[DIR_00M])[kb];
 //			real dxxux = c1o2 * (-mgcbb + mgabb);
 //			real dyyuy = c1o2 * (-mgbcb + mgbab);
 //			real dzzuz = c1o2 * (-mgbbc + mgbba);
 //			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-//			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-//			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-//			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-//			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-//			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-//			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-//			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-//			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-//			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-//			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-//			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-//			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-//			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-//			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-//			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-//			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-//			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-//			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-//			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-//			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-//			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-//			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-//			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-//			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-//			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-//			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+//			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+//			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+//			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+//			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+//			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+//			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+//			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+//			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+//			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+//			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+//			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+//			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+//			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+//			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+//			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+//			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+//			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+//			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+//			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+//			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+//			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+//			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+//			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+//			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+//			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+//			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+//			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 //			////////////////////////////////////////////////////////////////////////////////////
 //			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 //				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -3013,41 +3013,41 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(real omega,
 //			////////////////////////////////////////////////////////////////////////////////////
 //
 //			////////////////////////////////////////////////////////////////////////////////////
-//			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-//			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-//			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-//			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-//			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-//			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-//			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-//			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-//			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-//			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-//			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-//			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-//			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-//			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-//			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-//			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-//			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-//			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-//			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-//			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-//			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-//			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-//			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-//			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-//			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-//			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-//			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
-//			////////////////////////////////////////////////////////////////////////////////////
-//
-//			(G.g[E])[k]  = mgabb;                                                               
-//			(G.g[W])[kw] = mgcbb;                                                              
-//			(G.g[N])[k]  = mgbab;
-//			(G.g[S])[ks] = mgbcb;
-//			(G.g[T])[k]  = mgbba;
-//			(G.g[B])[kb] = mgbbc;
+//			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+//			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+//			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+//			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+//			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+//			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+//			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+//			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+//			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+//			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+//			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+//			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+//			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+//			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+//			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+//			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+//			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+//			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+//			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+//			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+//			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+//			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+//			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+//			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+//			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+//			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+//			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
+//			////////////////////////////////////////////////////////////////////////////////////
+//
+//			(G.g[DIR_P00])[k]  = mgabb;                                                               
+//			(G.g[DIR_M00])[kw] = mgcbb;                                                              
+//			(G.g[DIR_0P0])[k]  = mgbab;
+//			(G.g[DIR_0M0])[ks] = mgbcb;
+//			(G.g[DIR_00P])[k]  = mgbba;
+//			(G.g[DIR_00M])[kb] = mgbbc;
 //		}
 //	}
 //}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
index 133c2aef257d30c9b4bb064ff4bff8e3d0593c28..5146242fed374a919b6dcc02774db1d8ce4f864a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/DragLift27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void DragLiftPost27(  real* DD, 
+__global__ void DragLiftPost27(  real* DD, 
 											int* k_Q, 
 											real* QQ,
 											int numberOfBCnodes, 
@@ -23,63 +23,63 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -100,32 +100,32 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[E   * numberOfBCnodes];
-		q_dirW   = &QQ[W   * numberOfBCnodes];
-		q_dirN   = &QQ[N   * numberOfBCnodes];
-		q_dirS   = &QQ[S   * numberOfBCnodes];
-		q_dirT   = &QQ[T   * numberOfBCnodes];
-		q_dirB   = &QQ[B   * numberOfBCnodes];
-		q_dirNE  = &QQ[NE  * numberOfBCnodes];
-		q_dirSW  = &QQ[SW  * numberOfBCnodes];
-		q_dirSE  = &QQ[SE  * numberOfBCnodes];
-		q_dirNW  = &QQ[NW  * numberOfBCnodes];
-		q_dirTE  = &QQ[TE  * numberOfBCnodes];
-		q_dirBW  = &QQ[BW  * numberOfBCnodes];
-		q_dirBE  = &QQ[BE  * numberOfBCnodes];
-		q_dirTW  = &QQ[TW  * numberOfBCnodes];
-		q_dirTN  = &QQ[TN  * numberOfBCnodes];
-		q_dirBS  = &QQ[BS  * numberOfBCnodes];
-		q_dirBN  = &QQ[BN  * numberOfBCnodes];
-		q_dirTS  = &QQ[TS  * numberOfBCnodes];
-		q_dirTNE = &QQ[TNE * numberOfBCnodes];
-		q_dirTSW = &QQ[TSW * numberOfBCnodes];
-		q_dirTSE = &QQ[TSE * numberOfBCnodes];
-		q_dirTNW = &QQ[TNW * numberOfBCnodes];
-		q_dirBNE = &QQ[BNE * numberOfBCnodes];
-		q_dirBSW = &QQ[BSW * numberOfBCnodes];
-		q_dirBSE = &QQ[BSE * numberOfBCnodes];
-		q_dirBNW = &QQ[BNW * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -160,32 +160,32 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_W    = (D.f[E   ])[ke   ];
-		f_E    = (D.f[W   ])[kw   ];
-		f_S    = (D.f[N   ])[kn   ];
-		f_N    = (D.f[S   ])[ks   ];
-		f_B    = (D.f[T   ])[kt   ];
-		f_T    = (D.f[B   ])[kb   ];
-		f_SW   = (D.f[NE  ])[kne  ];
-		f_NE   = (D.f[SW  ])[ksw  ];
-		f_NW   = (D.f[SE  ])[kse  ];
-		f_SE   = (D.f[NW  ])[knw  ];
-		f_BW   = (D.f[TE  ])[kte  ];
-		f_TE   = (D.f[BW  ])[kbw  ];
-		f_TW   = (D.f[BE  ])[kbe  ];
-		f_BE   = (D.f[TW  ])[ktw  ];
-		f_BS   = (D.f[TN  ])[ktn  ];
-		f_TN   = (D.f[BS  ])[kbs  ];
-		f_TS   = (D.f[BN  ])[kbn  ];
-		f_BN   = (D.f[TS  ])[kts  ];
-		f_BSW  = (D.f[TNE ])[ktne ];
-		f_BNE  = (D.f[TSW ])[ktsw ];
-		f_BNW  = (D.f[TSE ])[ktse ];
-		f_BSE  = (D.f[TNW ])[ktnw ];
-		f_TSW  = (D.f[BNE ])[kbne ];
-		f_TNE  = (D.f[BSW ])[kbsw ];
-		f_TNW  = (D.f[BSE ])[kbse ];
-		f_TSE  = (D.f[BNW ])[kbnw ];
+		f_W    = (D.f[DIR_P00   ])[ke   ];
+		f_E    = (D.f[DIR_M00   ])[kw   ];
+		f_S    = (D.f[DIR_0P0   ])[kn   ];
+		f_N    = (D.f[DIR_0M0   ])[ks   ];
+		f_B    = (D.f[DIR_00P   ])[kt   ];
+		f_T    = (D.f[DIR_00M   ])[kb   ];
+		f_SW   = (D.f[DIR_PP0  ])[kne  ];
+		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+		f_NW   = (D.f[DIR_PM0  ])[kse  ];
+		f_SE   = (D.f[DIR_MP0  ])[knw  ];
+		f_BW   = (D.f[DIR_P0P  ])[kte  ];
+		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+		f_BN   = (D.f[DIR_0MP  ])[kts  ];
+		f_BSW  = (D.f[DIR_PPP ])[ktne ];
+		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+		f_BNW  = (D.f[DIR_PMP ])[ktse ];
+		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+		f_TSW  = (D.f[DIR_PPM ])[kbne ];
+		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+		f_TNW  = (D.f[DIR_PMM ])[kbse ];
+		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
@@ -272,7 +272,7 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void DragLiftPre27(   real* DD, 
+__global__ void DragLiftPre27(   real* DD, 
 											int* k_Q, 
 											real* QQ,
 											int numberOfBCnodes, 
@@ -288,63 +288,63 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -365,32 +365,32 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[E   * numberOfBCnodes];
-		q_dirW   = &QQ[W   * numberOfBCnodes];
-		q_dirN   = &QQ[N   * numberOfBCnodes];
-		q_dirS   = &QQ[S   * numberOfBCnodes];
-		q_dirT   = &QQ[T   * numberOfBCnodes];
-		q_dirB   = &QQ[B   * numberOfBCnodes];
-		q_dirNE  = &QQ[NE  * numberOfBCnodes];
-		q_dirSW  = &QQ[SW  * numberOfBCnodes];
-		q_dirSE  = &QQ[SE  * numberOfBCnodes];
-		q_dirNW  = &QQ[NW  * numberOfBCnodes];
-		q_dirTE  = &QQ[TE  * numberOfBCnodes];
-		q_dirBW  = &QQ[BW  * numberOfBCnodes];
-		q_dirBE  = &QQ[BE  * numberOfBCnodes];
-		q_dirTW  = &QQ[TW  * numberOfBCnodes];
-		q_dirTN  = &QQ[TN  * numberOfBCnodes];
-		q_dirBS  = &QQ[BS  * numberOfBCnodes];
-		q_dirBN  = &QQ[BN  * numberOfBCnodes];
-		q_dirTS  = &QQ[TS  * numberOfBCnodes];
-		q_dirTNE = &QQ[TNE * numberOfBCnodes];
-		q_dirTSW = &QQ[TSW * numberOfBCnodes];
-		q_dirTSE = &QQ[TSE * numberOfBCnodes];
-		q_dirTNW = &QQ[TNW * numberOfBCnodes];
-		q_dirBNE = &QQ[BNE * numberOfBCnodes];
-		q_dirBSW = &QQ[BSW * numberOfBCnodes];
-		q_dirBSE = &QQ[BSE * numberOfBCnodes];
-		q_dirBNW = &QQ[BNW * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -425,32 +425,32 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
                 f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_E   = (D.f[E   ])[ke   ];
-		f_W   = (D.f[W   ])[kw   ];
-		f_N   = (D.f[N   ])[kn   ];
-		f_S   = (D.f[S   ])[ks   ];
-		f_T   = (D.f[T   ])[kt   ];
-		f_B   = (D.f[B   ])[kb   ];
-		f_NE  = (D.f[NE  ])[kne  ];
-		f_SW  = (D.f[SW  ])[ksw  ];
-		f_SE  = (D.f[SE  ])[kse  ];
-		f_NW  = (D.f[NW  ])[knw  ];
-		f_TE  = (D.f[TE  ])[kte  ];
-		f_BW  = (D.f[BW  ])[kbw  ];
-		f_BE  = (D.f[BE  ])[kbe  ];
-		f_TW  = (D.f[TW  ])[ktw  ];
-		f_TN  = (D.f[TN  ])[ktn  ];
-		f_BS  = (D.f[BS  ])[kbs  ];
-		f_BN  = (D.f[BN  ])[kbn  ];
-		f_TS  = (D.f[TS  ])[kts  ];
-		f_TNE = (D.f[TNE ])[ktne ];
-		f_TSW = (D.f[TSW ])[ktsw ];
-		f_TSE = (D.f[TSE ])[ktse ];
-		f_TNW = (D.f[TNW ])[ktnw ];
-		f_BNE = (D.f[BNE ])[kbne ];
-		f_BSW = (D.f[BSW ])[kbsw ];
-		f_BSE = (D.f[BSE ])[kbse ];
-		f_BNW = (D.f[BNW ])[kbnw ];
+		f_E   = (D.f[DIR_P00   ])[ke   ];
+		f_W   = (D.f[DIR_M00   ])[kw   ];
+		f_N   = (D.f[DIR_0P0   ])[kn   ];
+		f_S   = (D.f[DIR_0M0   ])[ks   ];
+		f_T   = (D.f[DIR_00P   ])[kt   ];
+		f_B   = (D.f[DIR_00M   ])[kb   ];
+		f_NE  = (D.f[DIR_PP0  ])[kne  ];
+		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+		f_SE  = (D.f[DIR_PM0  ])[kse  ];
+		f_NW  = (D.f[DIR_MP0  ])[knw  ];
+		f_TE  = (D.f[DIR_P0P  ])[kte  ];
+		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+		f_TS  = (D.f[DIR_0MP  ])[kts  ];
+		f_TNE = (D.f[DIR_PPP ])[ktne ];
+		f_TSW = (D.f[DIR_MMP ])[ktsw ];
+		f_TSE = (D.f[DIR_PMP ])[ktse ];
+		f_TNW = (D.f[DIR_MPP ])[ktnw ];
+		f_BNE = (D.f[DIR_PPM ])[kbne ];
+		f_BSW = (D.f[DIR_MMM ])[kbsw ];
+		f_BSE = (D.f[DIR_PMM ])[kbse ];
+		f_BNW = (D.f[DIR_MPM ])[kbnw ];
 		 ////////////////////////////////////////////////////////////////////////////////
 		double	OnE   = c0o1, OnW   = c0o1, OnN   = c0o1, OnS   = c0o1, OnT = c0o1, OnB = c0o1, 
 				OnNE  = c0o1, OnSW  = c0o1, OnSE  = c0o1, OnNW  = c0o1, 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
index 6b10ede662132ac18f5a3474d2a6d654bf6c835c..5470da46342c85e57370227313c8c82674a17e6e 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ExchangeData27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendFsPost27(real* DD,
+__global__ void getSendFsPost27(real* DD,
 										   real* bufferFs,
 										   int* sendIndex,
                                            int buffmax,
@@ -65,150 +65,150 @@ extern "C" __global__ void getSendFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[E   ] = &bufferFs[E   *buffmax];
-      Dbuff.f[W   ] = &bufferFs[W   *buffmax];
-      Dbuff.f[N   ] = &bufferFs[N   *buffmax];
-      Dbuff.f[S   ] = &bufferFs[S   *buffmax];
-      Dbuff.f[T   ] = &bufferFs[T   *buffmax];
-      Dbuff.f[B   ] = &bufferFs[B   *buffmax];
-      Dbuff.f[NE  ] = &bufferFs[NE  *buffmax];
-      Dbuff.f[SW  ] = &bufferFs[SW  *buffmax];
-      Dbuff.f[SE  ] = &bufferFs[SE  *buffmax];
-      Dbuff.f[NW  ] = &bufferFs[NW  *buffmax];
-      Dbuff.f[TE  ] = &bufferFs[TE  *buffmax];
-      Dbuff.f[BW  ] = &bufferFs[BW  *buffmax];
-      Dbuff.f[BE  ] = &bufferFs[BE  *buffmax];
-      Dbuff.f[TW  ] = &bufferFs[TW  *buffmax];
-      Dbuff.f[TN  ] = &bufferFs[TN  *buffmax];
-      Dbuff.f[BS  ] = &bufferFs[BS  *buffmax];
-      Dbuff.f[BN  ] = &bufferFs[BN  *buffmax];
-      Dbuff.f[TS  ] = &bufferFs[TS  *buffmax];
-      Dbuff.f[REST] = &bufferFs[REST*buffmax];
-      Dbuff.f[TNE ] = &bufferFs[TNE *buffmax];
-      Dbuff.f[TSW ] = &bufferFs[TSW *buffmax];
-      Dbuff.f[TSE ] = &bufferFs[TSE *buffmax];
-      Dbuff.f[TNW ] = &bufferFs[TNW *buffmax];
-      Dbuff.f[BNE ] = &bufferFs[BNE *buffmax];
-      Dbuff.f[BSW ] = &bufferFs[BSW *buffmax];
-      Dbuff.f[BSE ] = &bufferFs[BSE *buffmax];
-      Dbuff.f[BNW ] = &bufferFs[BNW *buffmax];
+      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
+      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
+      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
+      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
+      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
+      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
+      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
+      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
+      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
+      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
+      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
+      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
+      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
+      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
+      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
+      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
+      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
+      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
+      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
+      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
+      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
+      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
+      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
+      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
+      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
+      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      //(Dbuff.f[E   ])[k] = (D.f[E   ])[ke   ];
-      //(Dbuff.f[W   ])[k] = (D.f[W   ])[kw   ];
-      //(Dbuff.f[N   ])[k] = (D.f[N   ])[kn   ];
-      //(Dbuff.f[S   ])[k] = (D.f[S   ])[ks   ];
-      //(Dbuff.f[T   ])[k] = (D.f[T   ])[kt   ];
-      //(Dbuff.f[B   ])[k] = (D.f[B   ])[kb   ];
-      //(Dbuff.f[NE  ])[k] = (D.f[NE  ])[kne  ];
-      //(Dbuff.f[SW  ])[k] = (D.f[SW  ])[ksw  ];
-      //(Dbuff.f[SE  ])[k] = (D.f[SE  ])[kse  ];
-      //(Dbuff.f[NW  ])[k] = (D.f[NW  ])[knw  ];
-      //(Dbuff.f[TE  ])[k] = (D.f[TE  ])[kte  ];
-      //(Dbuff.f[BW  ])[k] = (D.f[BW  ])[kbw  ];
-      //(Dbuff.f[BE  ])[k] = (D.f[BE  ])[kbe  ];
-      //(Dbuff.f[TW  ])[k] = (D.f[TW  ])[ktw  ];
-      //(Dbuff.f[TN  ])[k] = (D.f[TN  ])[ktn  ];
-      //(Dbuff.f[BS  ])[k] = (D.f[BS  ])[kbs  ];
-      //(Dbuff.f[BN  ])[k] = (D.f[BN  ])[kbn  ];
-      //(Dbuff.f[TS  ])[k] = (D.f[TS  ])[kts  ];
-      //(Dbuff.f[REST])[k] = (D.f[REST])[kzero];
-      //(Dbuff.f[TNE ])[k] = (D.f[TNE ])[ktne ];
-      //(Dbuff.f[TSW ])[k] = (D.f[TSW ])[ktsw ];
-      //(Dbuff.f[TSE ])[k] = (D.f[TSE ])[ktse ];
-      //(Dbuff.f[TNW ])[k] = (D.f[TNW ])[ktnw ];
-      //(Dbuff.f[BNE ])[k] = (D.f[BNE ])[kbne ];
-      //(Dbuff.f[BSW ])[k] = (D.f[BSW ])[kbsw ];
-      //(Dbuff.f[BSE ])[k] = (D.f[BSE ])[kbse ];
-      //(Dbuff.f[BNW ])[k] = (D.f[BNW ])[kbnw ];
-      (Dbuff.f[E   ])[k] = (D.f[W   ])[kw   ];
-      (Dbuff.f[W   ])[k] = (D.f[E   ])[ke   ];
-      (Dbuff.f[N   ])[k] = (D.f[S   ])[ks   ];
-      (Dbuff.f[S   ])[k] = (D.f[N   ])[kn   ];
-      (Dbuff.f[T   ])[k] = (D.f[B   ])[kb   ];
-      (Dbuff.f[B   ])[k] = (D.f[T   ])[kt   ];
-      (Dbuff.f[NE  ])[k] = (D.f[SW  ])[ksw  ];
-      (Dbuff.f[SW  ])[k] = (D.f[NE  ])[kne  ];
-      (Dbuff.f[SE  ])[k] = (D.f[NW  ])[knw  ];
-      (Dbuff.f[NW  ])[k] = (D.f[SE  ])[kse  ];
-      (Dbuff.f[TE  ])[k] = (D.f[BW  ])[kbw  ];
-      (Dbuff.f[BW  ])[k] = (D.f[TE  ])[kte  ];
-      (Dbuff.f[BE  ])[k] = (D.f[TW  ])[ktw  ];
-      (Dbuff.f[TW  ])[k] = (D.f[BE  ])[kbe  ];
-      (Dbuff.f[TN  ])[k] = (D.f[BS  ])[kbs  ];
-      (Dbuff.f[BS  ])[k] = (D.f[TN  ])[ktn  ];
-      (Dbuff.f[BN  ])[k] = (D.f[TS  ])[kts  ];
-      (Dbuff.f[TS  ])[k] = (D.f[BN  ])[kbn  ];
-      (Dbuff.f[REST])[k] = (D.f[REST])[kzero];
-      (Dbuff.f[TNE ])[k] = (D.f[BSW ])[kbsw ];
-      (Dbuff.f[TSW ])[k] = (D.f[BNE ])[kbne ];
-      (Dbuff.f[TSE ])[k] = (D.f[BNW ])[kbnw ];
-      (Dbuff.f[TNW ])[k] = (D.f[BSE ])[kbse ];
-      (Dbuff.f[BNE ])[k] = (D.f[TSW ])[ktsw ];
-      (Dbuff.f[BSW ])[k] = (D.f[TNE ])[ktne ];
-      (Dbuff.f[BSE ])[k] = (D.f[TNW ])[ktnw ];
-      (Dbuff.f[BNW ])[k] = (D.f[TSE ])[ktse ];
+      //(Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
+      //(Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
+      //(Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
+      //(Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
+      //(Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
+      //(Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
+      //(Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
+      //(Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
+      //(Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
+      //(Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
+      //(Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
+      //(Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
+      //(Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
+      //(Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
+      //(Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
+      //(Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
+      //(Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
+      //(Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      //(Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
+      //(Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
+      //(Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
+      //(Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
+      //(Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
+      //(Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
+      //(Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
+      //(Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
+      //(Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
+      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_M00   ])[kw   ];
+      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_P00   ])[ke   ];
+      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
+      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
+      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00M   ])[kb   ];
+      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00P   ])[kt   ];
+      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
+      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
+      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
+      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
+      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
+      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_P0P  ])[kte  ];
+      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
+      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
+      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
+      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
+      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
+      (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
+      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_MMM ])[kbsw ];
+      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_PPM ])[kbne ];
+      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_MPM ])[kbnw ];
+      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_PMM ])[kbse ];
+      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_MMP ])[ktsw ];
+      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_PPP ])[ktne ];
+      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_MPP ])[ktnw ];
+      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_PMP ])[ktse ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -243,7 +243,7 @@ extern "C" __global__ void getSendFsPost27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvFsPost27(real* DD,
+__global__ void setRecvFsPost27(real* DD,
 										   real* bufferFs,
 										   int* recvIndex,
                                            int buffmax,
@@ -301,150 +301,150 @@ extern "C" __global__ void setRecvFsPost27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[E   ] = &bufferFs[E   *buffmax];
-      Dbuff.f[W   ] = &bufferFs[W   *buffmax];
-      Dbuff.f[N   ] = &bufferFs[N   *buffmax];
-      Dbuff.f[S   ] = &bufferFs[S   *buffmax];
-      Dbuff.f[T   ] = &bufferFs[T   *buffmax];
-      Dbuff.f[B   ] = &bufferFs[B   *buffmax];
-      Dbuff.f[NE  ] = &bufferFs[NE  *buffmax];
-      Dbuff.f[SW  ] = &bufferFs[SW  *buffmax];
-      Dbuff.f[SE  ] = &bufferFs[SE  *buffmax];
-      Dbuff.f[NW  ] = &bufferFs[NW  *buffmax];
-      Dbuff.f[TE  ] = &bufferFs[TE  *buffmax];
-      Dbuff.f[BW  ] = &bufferFs[BW  *buffmax];
-      Dbuff.f[BE  ] = &bufferFs[BE  *buffmax];
-      Dbuff.f[TW  ] = &bufferFs[TW  *buffmax];
-      Dbuff.f[TN  ] = &bufferFs[TN  *buffmax];
-      Dbuff.f[BS  ] = &bufferFs[BS  *buffmax];
-      Dbuff.f[BN  ] = &bufferFs[BN  *buffmax];
-      Dbuff.f[TS  ] = &bufferFs[TS  *buffmax];
-      Dbuff.f[REST] = &bufferFs[REST*buffmax];
-      Dbuff.f[TNE ] = &bufferFs[TNE *buffmax];
-      Dbuff.f[TSW ] = &bufferFs[TSW *buffmax];
-      Dbuff.f[TSE ] = &bufferFs[TSE *buffmax];
-      Dbuff.f[TNW ] = &bufferFs[TNW *buffmax];
-      Dbuff.f[BNE ] = &bufferFs[BNE *buffmax];
-      Dbuff.f[BSW ] = &bufferFs[BSW *buffmax];
-      Dbuff.f[BSE ] = &bufferFs[BSE *buffmax];
-      Dbuff.f[BNW ] = &bufferFs[BNW *buffmax];
+      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
+      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
+      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
+      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
+      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
+      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
+      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
+      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
+      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
+      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
+      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
+      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
+      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
+      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
+      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
+      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
+      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
+      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
+      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
+      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
+      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
+      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
+      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
+      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
+      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
+      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      //(D.f[E   ])[ke   ] = (Dbuff.f[E   ])[k];
-      //(D.f[W   ])[kw   ] = (Dbuff.f[W   ])[k];
-      //(D.f[N   ])[kn   ] = (Dbuff.f[N   ])[k];
-      //(D.f[S   ])[ks   ] = (Dbuff.f[S   ])[k];
-      //(D.f[T   ])[kt   ] = (Dbuff.f[T   ])[k];
-      //(D.f[B   ])[kb   ] = (Dbuff.f[B   ])[k];
-      //(D.f[NE  ])[kne  ] = (Dbuff.f[NE  ])[k];
-      //(D.f[SW  ])[ksw  ] = (Dbuff.f[SW  ])[k];
-      //(D.f[SE  ])[kse  ] = (Dbuff.f[SE  ])[k];
-      //(D.f[NW  ])[knw  ] = (Dbuff.f[NW  ])[k];
-      //(D.f[TE  ])[kte  ] = (Dbuff.f[TE  ])[k];
-      //(D.f[BW  ])[kbw  ] = (Dbuff.f[BW  ])[k];
-      //(D.f[BE  ])[kbe  ] = (Dbuff.f[BE  ])[k];
-      //(D.f[TW  ])[ktw  ] = (Dbuff.f[TW  ])[k];
-      //(D.f[TN  ])[ktn  ] = (Dbuff.f[TN  ])[k];
-      //(D.f[BS  ])[kbs  ] = (Dbuff.f[BS  ])[k];
-      //(D.f[BN  ])[kbn  ] = (Dbuff.f[BN  ])[k];
-      //(D.f[TS  ])[kts  ] = (Dbuff.f[TS  ])[k];
-      //(D.f[REST])[kzero] = (Dbuff.f[REST])[k];
-      //(D.f[TNE ])[ktne ] = (Dbuff.f[TNE ])[k];
-      //(D.f[TSW ])[ktsw ] = (Dbuff.f[TSW ])[k];
-      //(D.f[TSE ])[ktse ] = (Dbuff.f[TSE ])[k];
-      //(D.f[TNW ])[ktnw ] = (Dbuff.f[TNW ])[k];
-      //(D.f[BNE ])[kbne ] = (Dbuff.f[BNE ])[k];
-      //(D.f[BSW ])[kbsw ] = (Dbuff.f[BSW ])[k];
-      //(D.f[BSE ])[kbse ] = (Dbuff.f[BSE ])[k];
-      //(D.f[BNW ])[kbnw ] = (Dbuff.f[BNW ])[k];
-      (D.f[W   ])[kw   ] = (Dbuff.f[E   ])[k];
-      (D.f[E   ])[ke   ] = (Dbuff.f[W   ])[k];
-      (D.f[S   ])[ks   ] = (Dbuff.f[N   ])[k];
-      (D.f[N   ])[kn   ] = (Dbuff.f[S   ])[k];
-      (D.f[B   ])[kb   ] = (Dbuff.f[T   ])[k];
-      (D.f[T   ])[kt   ] = (Dbuff.f[B   ])[k];
-      (D.f[SW  ])[ksw  ] = (Dbuff.f[NE  ])[k];
-      (D.f[NE  ])[kne  ] = (Dbuff.f[SW  ])[k];
-      (D.f[NW  ])[knw  ] = (Dbuff.f[SE  ])[k];
-      (D.f[SE  ])[kse  ] = (Dbuff.f[NW  ])[k];
-      (D.f[BW  ])[kbw  ] = (Dbuff.f[TE  ])[k];
-      (D.f[TE  ])[kte  ] = (Dbuff.f[BW  ])[k];
-      (D.f[TW  ])[ktw  ] = (Dbuff.f[BE  ])[k];
-      (D.f[BE  ])[kbe  ] = (Dbuff.f[TW  ])[k];
-      (D.f[BS  ])[kbs  ] = (Dbuff.f[TN  ])[k];
-      (D.f[TN  ])[ktn  ] = (Dbuff.f[BS  ])[k];
-      (D.f[TS  ])[kts  ] = (Dbuff.f[BN  ])[k];
-      (D.f[BN  ])[kbn  ] = (Dbuff.f[TS  ])[k];
-      (D.f[REST])[kzero] = (Dbuff.f[REST])[k];
-      (D.f[BSW ])[kbsw ] = (Dbuff.f[TNE ])[k];
-      (D.f[BNE ])[kbne ] = (Dbuff.f[TSW ])[k];
-      (D.f[BNW ])[kbnw ] = (Dbuff.f[TSE ])[k];
-      (D.f[BSE ])[kbse ] = (Dbuff.f[TNW ])[k];
-      (D.f[TSW ])[ktsw ] = (Dbuff.f[BNE ])[k];
-      (D.f[TNE ])[ktne ] = (Dbuff.f[BSW ])[k];
-      (D.f[TNW ])[ktnw ] = (Dbuff.f[BSE ])[k];
-      (D.f[TSE ])[ktse ] = (Dbuff.f[BNW ])[k];
+      //(D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
+      //(D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
+      //(D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
+      //(D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
+      //(D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
+      //(D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
+      //(D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
+      //(D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
+      //(D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
+      //(D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
+      //(D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
+      //(D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
+      //(D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
+      //(D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
+      //(D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
+      //(D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
+      //(D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
+      //(D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      //(D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
+      //(D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
+      //(D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
+      //(D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
+      //(D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
+      //(D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
+      //(D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
+      //(D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
+      //(D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
+      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_P00   ])[k];
+      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_M00   ])[k];
+      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0P0   ])[k];
+      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0M0   ])[k];
+      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00P   ])[k];
+      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00M   ])[k];
+      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_PP0  ])[k];
+      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_MM0  ])[k];
+      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_PM0  ])[k];
+      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_MP0  ])[k];
+      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_P0P  ])[k];
+      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_M0M  ])[k];
+      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_P0M  ])[k];
+      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_M0P  ])[k];
+      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0PP  ])[k];
+      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0MM  ])[k];
+      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0PM  ])[k];
+      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0MP  ])[k];
+      (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
+      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_PPP ])[k];
+      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_MMP ])[k];
+      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_PMP ])[k];
+      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_MPP ])[k];
+      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_PPM ])[k];
+      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_MMM ])[k];
+      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_PMM ])[k];
+      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_MPM ])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -478,7 +478,7 @@ extern "C" __global__ void setRecvFsPost27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendFsPre27(real* DD,
+__global__ void getSendFsPre27(real* DD,
 										  real* bufferFs,
 										  int* sendIndex,
                                           int buffmax,
@@ -536,123 +536,123 @@ extern "C" __global__ void getSendFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[E   ] = &bufferFs[E   *buffmax];
-      Dbuff.f[W   ] = &bufferFs[W   *buffmax];
-      Dbuff.f[N   ] = &bufferFs[N   *buffmax];
-      Dbuff.f[S   ] = &bufferFs[S   *buffmax];
-      Dbuff.f[T   ] = &bufferFs[T   *buffmax];
-      Dbuff.f[B   ] = &bufferFs[B   *buffmax];
-      Dbuff.f[NE  ] = &bufferFs[NE  *buffmax];
-      Dbuff.f[SW  ] = &bufferFs[SW  *buffmax];
-      Dbuff.f[SE  ] = &bufferFs[SE  *buffmax];
-      Dbuff.f[NW  ] = &bufferFs[NW  *buffmax];
-      Dbuff.f[TE  ] = &bufferFs[TE  *buffmax];
-      Dbuff.f[BW  ] = &bufferFs[BW  *buffmax];
-      Dbuff.f[BE  ] = &bufferFs[BE  *buffmax];
-      Dbuff.f[TW  ] = &bufferFs[TW  *buffmax];
-      Dbuff.f[TN  ] = &bufferFs[TN  *buffmax];
-      Dbuff.f[BS  ] = &bufferFs[BS  *buffmax];
-      Dbuff.f[BN  ] = &bufferFs[BN  *buffmax];
-      Dbuff.f[TS  ] = &bufferFs[TS  *buffmax];
-      Dbuff.f[REST] = &bufferFs[REST*buffmax];
-      Dbuff.f[TNE ] = &bufferFs[TNE *buffmax];
-      Dbuff.f[TSW ] = &bufferFs[TSW *buffmax];
-      Dbuff.f[TSE ] = &bufferFs[TSE *buffmax];
-      Dbuff.f[TNW ] = &bufferFs[TNW *buffmax];
-      Dbuff.f[BNE ] = &bufferFs[BNE *buffmax];
-      Dbuff.f[BSW ] = &bufferFs[BSW *buffmax];
-      Dbuff.f[BSE ] = &bufferFs[BSE *buffmax];
-      Dbuff.f[BNW ] = &bufferFs[BNW *buffmax];
+      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
+      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
+      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
+      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
+      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
+      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
+      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
+      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
+      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
+      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
+      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
+      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
+      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
+      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
+      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
+      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
+      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
+      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
+      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
+      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
+      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
+      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
+      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
+      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
+      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
+      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy to buffer
-      (Dbuff.f[E   ])[k] = (D.f[E   ])[ke   ];
-      (Dbuff.f[W   ])[k] = (D.f[W   ])[kw   ];
-      (Dbuff.f[N   ])[k] = (D.f[N   ])[kn   ];
-      (Dbuff.f[S   ])[k] = (D.f[S   ])[ks   ];
-      (Dbuff.f[T   ])[k] = (D.f[T   ])[kt   ];
-      (Dbuff.f[B   ])[k] = (D.f[B   ])[kb   ];
-      (Dbuff.f[NE  ])[k] = (D.f[NE  ])[kne  ];
-      (Dbuff.f[SW  ])[k] = (D.f[SW  ])[ksw  ];
-      (Dbuff.f[SE  ])[k] = (D.f[SE  ])[kse  ];
-      (Dbuff.f[NW  ])[k] = (D.f[NW  ])[knw  ];
-      (Dbuff.f[TE  ])[k] = (D.f[TE  ])[kte  ];
-      (Dbuff.f[BW  ])[k] = (D.f[BW  ])[kbw  ];
-      (Dbuff.f[BE  ])[k] = (D.f[BE  ])[kbe  ];
-      (Dbuff.f[TW  ])[k] = (D.f[TW  ])[ktw  ];
-      (Dbuff.f[TN  ])[k] = (D.f[TN  ])[ktn  ];
-      (Dbuff.f[BS  ])[k] = (D.f[BS  ])[kbs  ];
-      (Dbuff.f[BN  ])[k] = (D.f[BN  ])[kbn  ];
-      (Dbuff.f[TS  ])[k] = (D.f[TS  ])[kts  ];
-      (Dbuff.f[REST])[k] = (D.f[REST])[kzero];
-      (Dbuff.f[TNE ])[k] = (D.f[TNE ])[ktne ];
-      (Dbuff.f[TSW ])[k] = (D.f[TSW ])[ktsw ];
-      (Dbuff.f[TSE ])[k] = (D.f[TSE ])[ktse ];
-      (Dbuff.f[TNW ])[k] = (D.f[TNW ])[ktnw ];
-      (Dbuff.f[BNE ])[k] = (D.f[BNE ])[kbne ];
-      (Dbuff.f[BSW ])[k] = (D.f[BSW ])[kbsw ];
-      (Dbuff.f[BSE ])[k] = (D.f[BSE ])[kbse ];
-      (Dbuff.f[BNW ])[k] = (D.f[BNW ])[kbnw ];
+      (Dbuff.f[DIR_P00   ])[k] = (D.f[DIR_P00   ])[ke   ];
+      (Dbuff.f[DIR_M00   ])[k] = (D.f[DIR_M00   ])[kw   ];
+      (Dbuff.f[DIR_0P0   ])[k] = (D.f[DIR_0P0   ])[kn   ];
+      (Dbuff.f[DIR_0M0   ])[k] = (D.f[DIR_0M0   ])[ks   ];
+      (Dbuff.f[DIR_00P   ])[k] = (D.f[DIR_00P   ])[kt   ];
+      (Dbuff.f[DIR_00M   ])[k] = (D.f[DIR_00M   ])[kb   ];
+      (Dbuff.f[DIR_PP0  ])[k] = (D.f[DIR_PP0  ])[kne  ];
+      (Dbuff.f[DIR_MM0  ])[k] = (D.f[DIR_MM0  ])[ksw  ];
+      (Dbuff.f[DIR_PM0  ])[k] = (D.f[DIR_PM0  ])[kse  ];
+      (Dbuff.f[DIR_MP0  ])[k] = (D.f[DIR_MP0  ])[knw  ];
+      (Dbuff.f[DIR_P0P  ])[k] = (D.f[DIR_P0P  ])[kte  ];
+      (Dbuff.f[DIR_M0M  ])[k] = (D.f[DIR_M0M  ])[kbw  ];
+      (Dbuff.f[DIR_P0M  ])[k] = (D.f[DIR_P0M  ])[kbe  ];
+      (Dbuff.f[DIR_M0P  ])[k] = (D.f[DIR_M0P  ])[ktw  ];
+      (Dbuff.f[DIR_0PP  ])[k] = (D.f[DIR_0PP  ])[ktn  ];
+      (Dbuff.f[DIR_0MM  ])[k] = (D.f[DIR_0MM  ])[kbs  ];
+      (Dbuff.f[DIR_0PM  ])[k] = (D.f[DIR_0PM  ])[kbn  ];
+      (Dbuff.f[DIR_0MP  ])[k] = (D.f[DIR_0MP  ])[kts  ];
+      (Dbuff.f[DIR_000])[k] = (D.f[DIR_000])[kzero];
+      (Dbuff.f[DIR_PPP ])[k] = (D.f[DIR_PPP ])[ktne ];
+      (Dbuff.f[DIR_MMP ])[k] = (D.f[DIR_MMP ])[ktsw ];
+      (Dbuff.f[DIR_PMP ])[k] = (D.f[DIR_PMP ])[ktse ];
+      (Dbuff.f[DIR_MPP ])[k] = (D.f[DIR_MPP ])[ktnw ];
+      (Dbuff.f[DIR_PPM ])[k] = (D.f[DIR_PPM ])[kbne ];
+      (Dbuff.f[DIR_MMM ])[k] = (D.f[DIR_MMM ])[kbsw ];
+      (Dbuff.f[DIR_PMM ])[k] = (D.f[DIR_PMM ])[kbse ];
+      (Dbuff.f[DIR_MPM ])[k] = (D.f[DIR_MPM ])[kbnw ];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -687,7 +687,7 @@ extern "C" __global__ void getSendFsPre27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvFsPre27(real* DD,
+__global__ void setRecvFsPre27(real* DD,
 										  real* bufferFs,
 										  int* recvIndex,
                                           int buffmax,
@@ -745,123 +745,123 @@ extern "C" __global__ void setRecvFsPre27(real* DD,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set Pointer for Buffer Fs
       Distributions27 Dbuff;
-      Dbuff.f[E   ] = &bufferFs[E   *buffmax];
-      Dbuff.f[W   ] = &bufferFs[W   *buffmax];
-      Dbuff.f[N   ] = &bufferFs[N   *buffmax];
-      Dbuff.f[S   ] = &bufferFs[S   *buffmax];
-      Dbuff.f[T   ] = &bufferFs[T   *buffmax];
-      Dbuff.f[B   ] = &bufferFs[B   *buffmax];
-      Dbuff.f[NE  ] = &bufferFs[NE  *buffmax];
-      Dbuff.f[SW  ] = &bufferFs[SW  *buffmax];
-      Dbuff.f[SE  ] = &bufferFs[SE  *buffmax];
-      Dbuff.f[NW  ] = &bufferFs[NW  *buffmax];
-      Dbuff.f[TE  ] = &bufferFs[TE  *buffmax];
-      Dbuff.f[BW  ] = &bufferFs[BW  *buffmax];
-      Dbuff.f[BE  ] = &bufferFs[BE  *buffmax];
-      Dbuff.f[TW  ] = &bufferFs[TW  *buffmax];
-      Dbuff.f[TN  ] = &bufferFs[TN  *buffmax];
-      Dbuff.f[BS  ] = &bufferFs[BS  *buffmax];
-      Dbuff.f[BN  ] = &bufferFs[BN  *buffmax];
-      Dbuff.f[TS  ] = &bufferFs[TS  *buffmax];
-      Dbuff.f[REST] = &bufferFs[REST*buffmax];
-      Dbuff.f[TNE ] = &bufferFs[TNE *buffmax];
-      Dbuff.f[TSW ] = &bufferFs[TSW *buffmax];
-      Dbuff.f[TSE ] = &bufferFs[TSE *buffmax];
-      Dbuff.f[TNW ] = &bufferFs[TNW *buffmax];
-      Dbuff.f[BNE ] = &bufferFs[BNE *buffmax];
-      Dbuff.f[BSW ] = &bufferFs[BSW *buffmax];
-      Dbuff.f[BSE ] = &bufferFs[BSE *buffmax];
-      Dbuff.f[BNW ] = &bufferFs[BNW *buffmax];
+      Dbuff.f[DIR_P00   ] = &bufferFs[DIR_P00   *buffmax];
+      Dbuff.f[DIR_M00   ] = &bufferFs[DIR_M00   *buffmax];
+      Dbuff.f[DIR_0P0   ] = &bufferFs[DIR_0P0   *buffmax];
+      Dbuff.f[DIR_0M0   ] = &bufferFs[DIR_0M0   *buffmax];
+      Dbuff.f[DIR_00P   ] = &bufferFs[DIR_00P   *buffmax];
+      Dbuff.f[DIR_00M   ] = &bufferFs[DIR_00M   *buffmax];
+      Dbuff.f[DIR_PP0  ] = &bufferFs[DIR_PP0  *buffmax];
+      Dbuff.f[DIR_MM0  ] = &bufferFs[DIR_MM0  *buffmax];
+      Dbuff.f[DIR_PM0  ] = &bufferFs[DIR_PM0  *buffmax];
+      Dbuff.f[DIR_MP0  ] = &bufferFs[DIR_MP0  *buffmax];
+      Dbuff.f[DIR_P0P  ] = &bufferFs[DIR_P0P  *buffmax];
+      Dbuff.f[DIR_M0M  ] = &bufferFs[DIR_M0M  *buffmax];
+      Dbuff.f[DIR_P0M  ] = &bufferFs[DIR_P0M  *buffmax];
+      Dbuff.f[DIR_M0P  ] = &bufferFs[DIR_M0P  *buffmax];
+      Dbuff.f[DIR_0PP  ] = &bufferFs[DIR_0PP  *buffmax];
+      Dbuff.f[DIR_0MM  ] = &bufferFs[DIR_0MM  *buffmax];
+      Dbuff.f[DIR_0PM  ] = &bufferFs[DIR_0PM  *buffmax];
+      Dbuff.f[DIR_0MP  ] = &bufferFs[DIR_0MP  *buffmax];
+      Dbuff.f[DIR_000] = &bufferFs[DIR_000*buffmax];
+      Dbuff.f[DIR_PPP ] = &bufferFs[DIR_PPP *buffmax];
+      Dbuff.f[DIR_MMP ] = &bufferFs[DIR_MMP *buffmax];
+      Dbuff.f[DIR_PMP ] = &bufferFs[DIR_PMP *buffmax];
+      Dbuff.f[DIR_MPP ] = &bufferFs[DIR_MPP *buffmax];
+      Dbuff.f[DIR_PPM ] = &bufferFs[DIR_PPM *buffmax];
+      Dbuff.f[DIR_MMM ] = &bufferFs[DIR_MMM *buffmax];
+      Dbuff.f[DIR_PMM ] = &bufferFs[DIR_PMM *buffmax];
+      Dbuff.f[DIR_MPM ] = &bufferFs[DIR_MPM *buffmax];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //copy from buffer
-      (D.f[E   ])[ke   ] = (Dbuff.f[E   ])[k];
-      (D.f[W   ])[kw   ] = (Dbuff.f[W   ])[k];
-      (D.f[N   ])[kn   ] = (Dbuff.f[N   ])[k];
-      (D.f[S   ])[ks   ] = (Dbuff.f[S   ])[k];
-      (D.f[T   ])[kt   ] = (Dbuff.f[T   ])[k];
-      (D.f[B   ])[kb   ] = (Dbuff.f[B   ])[k];
-      (D.f[NE  ])[kne  ] = (Dbuff.f[NE  ])[k];
-      (D.f[SW  ])[ksw  ] = (Dbuff.f[SW  ])[k];
-      (D.f[SE  ])[kse  ] = (Dbuff.f[SE  ])[k];
-      (D.f[NW  ])[knw  ] = (Dbuff.f[NW  ])[k];
-      (D.f[TE  ])[kte  ] = (Dbuff.f[TE  ])[k];
-      (D.f[BW  ])[kbw  ] = (Dbuff.f[BW  ])[k];
-      (D.f[BE  ])[kbe  ] = (Dbuff.f[BE  ])[k];
-      (D.f[TW  ])[ktw  ] = (Dbuff.f[TW  ])[k];
-      (D.f[TN  ])[ktn  ] = (Dbuff.f[TN  ])[k];
-      (D.f[BS  ])[kbs  ] = (Dbuff.f[BS  ])[k];
-      (D.f[BN  ])[kbn  ] = (Dbuff.f[BN  ])[k];
-      (D.f[TS  ])[kts  ] = (Dbuff.f[TS  ])[k];
-      (D.f[REST])[kzero] = (Dbuff.f[REST])[k];
-      (D.f[TNE ])[ktne ] = (Dbuff.f[TNE ])[k];
-      (D.f[TSW ])[ktsw ] = (Dbuff.f[TSW ])[k];
-      (D.f[TSE ])[ktse ] = (Dbuff.f[TSE ])[k];
-      (D.f[TNW ])[ktnw ] = (Dbuff.f[TNW ])[k];
-      (D.f[BNE ])[kbne ] = (Dbuff.f[BNE ])[k];
-      (D.f[BSW ])[kbsw ] = (Dbuff.f[BSW ])[k];
-      (D.f[BSE ])[kbse ] = (Dbuff.f[BSE ])[k];
-      (D.f[BNW ])[kbnw ] = (Dbuff.f[BNW ])[k];
+      (D.f[DIR_P00   ])[ke   ] = (Dbuff.f[DIR_P00   ])[k];
+      (D.f[DIR_M00   ])[kw   ] = (Dbuff.f[DIR_M00   ])[k];
+      (D.f[DIR_0P0   ])[kn   ] = (Dbuff.f[DIR_0P0   ])[k];
+      (D.f[DIR_0M0   ])[ks   ] = (Dbuff.f[DIR_0M0   ])[k];
+      (D.f[DIR_00P   ])[kt   ] = (Dbuff.f[DIR_00P   ])[k];
+      (D.f[DIR_00M   ])[kb   ] = (Dbuff.f[DIR_00M   ])[k];
+      (D.f[DIR_PP0  ])[kne  ] = (Dbuff.f[DIR_PP0  ])[k];
+      (D.f[DIR_MM0  ])[ksw  ] = (Dbuff.f[DIR_MM0  ])[k];
+      (D.f[DIR_PM0  ])[kse  ] = (Dbuff.f[DIR_PM0  ])[k];
+      (D.f[DIR_MP0  ])[knw  ] = (Dbuff.f[DIR_MP0  ])[k];
+      (D.f[DIR_P0P  ])[kte  ] = (Dbuff.f[DIR_P0P  ])[k];
+      (D.f[DIR_M0M  ])[kbw  ] = (Dbuff.f[DIR_M0M  ])[k];
+      (D.f[DIR_P0M  ])[kbe  ] = (Dbuff.f[DIR_P0M  ])[k];
+      (D.f[DIR_M0P  ])[ktw  ] = (Dbuff.f[DIR_M0P  ])[k];
+      (D.f[DIR_0PP  ])[ktn  ] = (Dbuff.f[DIR_0PP  ])[k];
+      (D.f[DIR_0MM  ])[kbs  ] = (Dbuff.f[DIR_0MM  ])[k];
+      (D.f[DIR_0PM  ])[kbn  ] = (Dbuff.f[DIR_0PM  ])[k];
+      (D.f[DIR_0MP  ])[kts  ] = (Dbuff.f[DIR_0MP  ])[k];
+      (D.f[DIR_000])[kzero] = (Dbuff.f[DIR_000])[k];
+      (D.f[DIR_PPP ])[ktne ] = (Dbuff.f[DIR_PPP ])[k];
+      (D.f[DIR_MMP ])[ktsw ] = (Dbuff.f[DIR_MMP ])[k];
+      (D.f[DIR_PMP ])[ktse ] = (Dbuff.f[DIR_PMP ])[k];
+      (D.f[DIR_MPP ])[ktnw ] = (Dbuff.f[DIR_MPP ])[k];
+      (D.f[DIR_PPM ])[kbne ] = (Dbuff.f[DIR_PPM ])[k];
+      (D.f[DIR_MMM ])[kbsw ] = (Dbuff.f[DIR_MMM ])[k];
+      (D.f[DIR_PMM ])[kbse ] = (Dbuff.f[DIR_PMM ])[k];
+      (D.f[DIR_MPM ])[kbnw ] = (Dbuff.f[DIR_MPM ])[k];
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -895,7 +895,7 @@ extern "C" __global__ void setRecvFsPre27(real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void getSendGsF3(
+__global__ void getSendGsF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -931,39 +931,39 @@ extern "C" __global__ void getSendGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[E] = &G6[E   *size_Mat];
-			G.g[W] = &G6[W   *size_Mat];
-			G.g[N] = &G6[N   *size_Mat];
-			G.g[S] = &G6[S   *size_Mat];
-			G.g[T] = &G6[T   *size_Mat];
-			G.g[B] = &G6[B   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 		}
 		else
 		{
-			G.g[W] = &G6[E   *size_Mat];
-			G.g[E] = &G6[W   *size_Mat];
-			G.g[S] = &G6[N   *size_Mat];
-			G.g[N] = &G6[S   *size_Mat];
-			G.g[B] = &G6[T   *size_Mat];
-			G.g[T] = &G6[B   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[E] = &bufferGs[E   *buffmax];
-		Dbuff.g[W] = &bufferGs[W   *buffmax];
-		Dbuff.g[N] = &bufferGs[N   *buffmax];
-		Dbuff.g[S] = &bufferGs[S   *buffmax];
-		Dbuff.g[T] = &bufferGs[T   *buffmax];
-		Dbuff.g[B] = &bufferGs[B   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write Gs to buffer
-		(Dbuff.g[E])[k] = (G.g[W])[kw];
-		(Dbuff.g[W])[k] = (G.g[E])[kr];
-		(Dbuff.g[N])[k] = (G.g[S])[ks];
-		(Dbuff.g[S])[k] = (G.g[N])[kr];
-		(Dbuff.g[T])[k] = (G.g[B])[kb];
-		(Dbuff.g[B])[k] = (G.g[T])[kr];
+		(Dbuff.g[DIR_P00])[k] = (G.g[DIR_M00])[kw];
+		(Dbuff.g[DIR_M00])[k] = (G.g[DIR_P00])[kr];
+		(Dbuff.g[DIR_0P0])[k] = (G.g[DIR_0M0])[ks];
+		(Dbuff.g[DIR_0M0])[k] = (G.g[DIR_0P0])[kr];
+		(Dbuff.g[DIR_00P])[k] = (G.g[DIR_00M])[kb];
+		(Dbuff.g[DIR_00M])[k] = (G.g[DIR_00P])[kr];
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -998,7 +998,7 @@ extern "C" __global__ void getSendGsF3(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void setRecvGsF3(
+__global__ void setRecvGsF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -1034,39 +1034,39 @@ extern "C" __global__ void setRecvGsF3(
 		Distributions6 G;
 		if (isEvenTimestep)
 		{
-			G.g[E] = &G6[E   *size_Mat];
-			G.g[W] = &G6[W   *size_Mat];
-			G.g[N] = &G6[N   *size_Mat];
-			G.g[S] = &G6[S   *size_Mat];
-			G.g[T] = &G6[T   *size_Mat];
-			G.g[B] = &G6[B   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+			G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+			G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+			G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+			G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 		}
 		else
 		{
-			G.g[W] = &G6[E   *size_Mat];
-			G.g[E] = &G6[W   *size_Mat];
-			G.g[S] = &G6[N   *size_Mat];
-			G.g[N] = &G6[S   *size_Mat];
-			G.g[B] = &G6[T   *size_Mat];
-			G.g[T] = &G6[B   *size_Mat];
+			G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+			G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+			G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+			G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+			G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+			G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 		}
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//set Pointer for Buffer Gs
 		Distributions6 Dbuff;
-		Dbuff.g[E] = &bufferGs[E   *buffmax];
-		Dbuff.g[W] = &bufferGs[W   *buffmax];
-		Dbuff.g[N] = &bufferGs[N   *buffmax];
-		Dbuff.g[S] = &bufferGs[S   *buffmax];
-		Dbuff.g[T] = &bufferGs[T   *buffmax];
-		Dbuff.g[B] = &bufferGs[B   *buffmax];
+		Dbuff.g[DIR_P00] = &bufferGs[DIR_P00   *buffmax];
+		Dbuff.g[DIR_M00] = &bufferGs[DIR_M00   *buffmax];
+		Dbuff.g[DIR_0P0] = &bufferGs[DIR_0P0   *buffmax];
+		Dbuff.g[DIR_0M0] = &bufferGs[DIR_0M0   *buffmax];
+		Dbuff.g[DIR_00P] = &bufferGs[DIR_00P   *buffmax];
+		Dbuff.g[DIR_00M] = &bufferGs[DIR_00M   *buffmax];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//write buffer to Gs
-		(G.g[W])[kw] = (Dbuff.g[E])[k];
-		(G.g[E])[kr] = (Dbuff.g[W])[k];
-		(G.g[S])[ks] = (Dbuff.g[N])[k];
-		(G.g[N])[kr] = (Dbuff.g[S])[k];
-		(G.g[B])[kb] = (Dbuff.g[T])[k];
-		(G.g[T])[kr] = (Dbuff.g[B])[k];
+		(G.g[DIR_M00])[kw] = (Dbuff.g[DIR_P00])[k];
+		(G.g[DIR_P00])[kr] = (Dbuff.g[DIR_M00])[k];
+		(G.g[DIR_0M0])[ks] = (Dbuff.g[DIR_0P0])[k];
+		(G.g[DIR_0P0])[kr] = (Dbuff.g[DIR_0M0])[k];
+		(G.g[DIR_00M])[kb] = (Dbuff.g[DIR_00P])[k];
+		(G.g[DIR_00P])[kr] = (Dbuff.g[DIR_00M])[k];
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
index 2d14f07bb6a25fb192ec96b425aa9c37a0aeb9f4..9868f6e1fb0484d54b72717dc9ed9f0a245753f5 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Interface.h
@@ -21,7 +21,7 @@ class Parameter;
 //////////////////////////////////////////////////////////////////////////
 //Kernel
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCas27(unsigned int grid_nx, 
+void KernelCas27(unsigned int grid_nx, 
                             unsigned int grid_ny, 
                             unsigned int grid_nz, 
                             real s9,
@@ -33,7 +33,7 @@ extern "C" void KernelCas27(unsigned int grid_nx,
                             int size_Mat,
                             bool EvenOrOdd);
 
-extern "C" void KernelCasSP27(unsigned int numberOfThreads, 
+void KernelCasSP27(unsigned int numberOfThreads, 
                               real s9,
                               unsigned int* bcMatD,
                               unsigned int* neighborX,
@@ -43,7 +43,7 @@ extern "C" void KernelCasSP27(unsigned int numberOfThreads,
                               int size_Mat,
                               bool EvenOrOdd);
 
-extern "C" void KernelCasSPMS27(unsigned int numberOfThreads, 
+void KernelCasSPMS27(unsigned int numberOfThreads, 
                                 real s9,
                                 unsigned int* bcMatD,
                                 unsigned int* neighborX,
@@ -53,7 +53,7 @@ extern "C" void KernelCasSPMS27(unsigned int numberOfThreads,
                                 int size_Mat,
                                 bool EvenOrOdd);
 
-extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
+void KernelCasSPMSOHM27( unsigned int numberOfThreads, 
                                    real s9,
                                    unsigned int* bcMatD,
                                    unsigned int* neighborX,
@@ -63,7 +63,7 @@ extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                    int size_Mat,
                                    bool EvenOrOdd);
 
-extern "C" void KernelKumCompSRTSP27(
+void KernelKumCompSRTSP27(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -76,7 +76,7 @@ extern "C" void KernelKumCompSRTSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
+void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    real s9,
 									    unsigned int* bcMatD,
 									    unsigned int* neighborX,
@@ -88,7 +88,7 @@ extern "C" void KernelCumulantD3Q27All4(unsigned int numberOfThreads,
 									    real* forces,
 									    bool EvenOrOdd);
 
-extern "C" void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, 
+void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads, 
 											real s9,
 											unsigned int* bcMatD,
 											unsigned int* neighborX,
@@ -101,7 +101,7 @@ extern "C" void KernelKumAA2016CompBulkSP27(unsigned int numberOfThreads,
 											real* forces,
 											bool EvenOrOdd);
 
-extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads, 
+void KernelKum1hSP27(    unsigned int numberOfThreads, 
 									real omega,
 									real deltaPhi,
 									real angularVelocity,
@@ -116,7 +116,7 @@ extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void KernelCascadeSP27(unsigned int numberOfThreads, 
+void KernelCascadeSP27(unsigned int numberOfThreads, 
 								  real s9,
 								  unsigned int* bcMatD,
 								  unsigned int* neighborX,
@@ -126,7 +126,7 @@ extern "C" void KernelCascadeSP27(unsigned int numberOfThreads,
 								  int size_Mat,
 								  bool EvenOrOdd);
 
-extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads, 
+void KernelKumNewSP27(   unsigned int numberOfThreads, 
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -137,7 +137,7 @@ extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
 									bool EvenOrOdd);
 
 
-extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -151,7 +151,7 @@ extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CumulantOnePreconditionedChimCompSP27(
+void CumulantOnePreconditionedChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -165,7 +165,7 @@ extern "C" void CumulantOnePreconditionedChimCompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CumulantOneChimCompSP27(
+void CumulantOneChimCompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -180,7 +180,7 @@ extern "C" void CumulantOneChimCompSP27(
 	bool EvenOrOdd);
 
 
-extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads, 
+void KernelKumIsoTestSP27(unsigned int numberOfThreads, 
 									 real s9,
 									 unsigned int* bcMatD,
 									 unsigned int* neighborX,
@@ -193,7 +193,7 @@ extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 int size_Mat,
 									 bool EvenOrOdd);
 
-extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads, 
+void KernelKumCompSP27(  unsigned int numberOfThreads, 
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -203,7 +203,7 @@ extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
+void KernelWaleBySoniMalavCumAA2016CompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -222,7 +222,7 @@ extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads, 
+void KernelPMCumOneCompSP27(unsigned int numberOfThreads, 
 									   real omega,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
@@ -238,7 +238,7 @@ extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   unsigned int* nodeIdsPorousMedia, 
 									   bool EvenOrOdd);
 
-extern "C" void KernelADincomp7(   unsigned int numberOfThreads, 
+void KernelADincomp7(   unsigned int numberOfThreads, 
 								   real diffusivity,
 								   unsigned int* bcMatD,
 								   unsigned int* neighborX,
@@ -249,7 +249,7 @@ extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
 								   int size_Mat,
 								   bool EvenOrOdd);
 
-extern "C" void KernelADincomp27(   unsigned int numberOfThreads, 
+void KernelADincomp27(   unsigned int numberOfThreads, 
 									real diffusivity,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -260,7 +260,7 @@ extern "C" void KernelADincomp27(   unsigned int numberOfThreads,
 									int size_Mat,
 									bool EvenOrOdd);
 
-extern "C" void Init27(int myid,
+void Init27(int myid,
                        int numprocs,
                        real u0,
                        unsigned int* geoD,
@@ -276,7 +276,7 @@ extern "C" void Init27(int myid,
                        int level,
                        int maxlevel);
 
-extern "C" void InitNonEqPartSP27(unsigned int numberOfThreads,
+void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   unsigned int* neighborX,
                                   unsigned int* neighborY,
                                   unsigned int* neighborZ,
@@ -292,7 +292,7 @@ extern "C" void InitNonEqPartSP27(unsigned int numberOfThreads,
                                   bool EvenOrOdd);
 
 
-extern "C" void InitThS7(  unsigned int numberOfThreads,
+void InitThS7(  unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -305,7 +305,7 @@ extern "C" void InitThS7(  unsigned int numberOfThreads,
                            real* DD7,
                            bool EvenOrOdd);
 
-extern "C" void InitADDev27( unsigned int numberOfThreads,
+void InitADDev27( unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -318,7 +318,7 @@ extern "C" void InitADDev27( unsigned int numberOfThreads,
                            real* DD27,
                            bool EvenOrOdd);
 
-extern "C" void PostProcessorF3_2018Fehlberg(
+void PostProcessorF3_2018Fehlberg(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -336,7 +336,7 @@ extern "C" void PostProcessorF3_2018Fehlberg(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" void CalcMac27( real* vxD,
+void CalcMac27( real* vxD,
                           real* vyD,
                           real* vzD,
                           real* rhoD,
@@ -351,7 +351,7 @@ extern "C" void CalcMac27( real* vxD,
                           real* DD,
                           bool isEvenTimestep);
 
-extern "C" void CalcMacSP27(real* vxD,
+void CalcMacSP27(real* vxD,
                             real* vyD,
                             real* vzD,
                             real* rhoD,
@@ -365,7 +365,7 @@ extern "C" void CalcMacSP27(real* vxD,
                             real* DD,
                             bool isEvenTimestep);
 
-extern "C" void CalcMacCompSP27(real* vxD,
+void CalcMacCompSP27(real* vxD,
 								real* vyD,
 								real* vzD,
 								real* rhoD,
@@ -379,7 +379,7 @@ extern "C" void CalcMacCompSP27(real* vxD,
 								real* DD,
 								bool isEvenTimestep);
 
-extern "C" void CalcMacThS7(  real* Conc,
+void CalcMacThS7(  real* Conc,
                               unsigned int* geoD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
@@ -389,7 +389,7 @@ extern "C" void CalcMacThS7(  real* Conc,
                               real* DD7,
                               bool isEvenTimestep);
 
-extern "C" void PlaneConcThS7(real* Conc,
+void PlaneConcThS7(real* Conc,
 							  int* kPC,
 							  unsigned int numberOfPointskPC,
 							  unsigned int* geoD,
@@ -401,7 +401,7 @@ extern "C" void PlaneConcThS7(real* Conc,
 							  real* DD7,
 							  bool isEvenTimestep);
 
-extern "C" void PlaneConcThS27(real* Conc,
+void PlaneConcThS27(real* Conc,
 							   int* kPC,
 							   unsigned int numberOfPointskPC,
 							   unsigned int* geoD,
@@ -413,7 +413,7 @@ extern "C" void PlaneConcThS27(real* Conc,
 							   real* DD27,
 							   bool isEvenTimestep);
 
-extern "C" void CalcConcentration27( unsigned int numberOfThreads,
+void CalcConcentration27( unsigned int numberOfThreads,
 	                                 real* Conc,
                                      unsigned int* geoD,
                                      unsigned int* neighborX,
@@ -423,7 +423,7 @@ extern "C" void CalcConcentration27( unsigned int numberOfThreads,
                                      real* DD27,
                                      bool isEvenTimestep);
 
-extern "C" void CalcMedSP27(  real* vxD,
+void CalcMedSP27(  real* vxD,
                               real* vyD,
                               real* vzD,
                               real* rhoD,
@@ -437,7 +437,7 @@ extern "C" void CalcMedSP27(  real* vxD,
                               real* DD,
                               bool isEvenTimestep);
 
-extern "C" void CalcMedCompSP27(real* vxD,
+void CalcMedCompSP27(real* vxD,
 								real* vyD,
 								real* vzD,
 								real* rhoD,
@@ -451,7 +451,7 @@ extern "C" void CalcMedCompSP27(real* vxD,
 								real* DD,
 								bool isEvenTimestep);
 
-extern "C" void CalcMedCompAD27(
+void CalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -468,7 +468,7 @@ extern "C" void CalcMedCompAD27(
 	real* DD_AD,
 	bool isEvenTimestep);
 
-extern "C" void CalcMacMedSP27(  real* vxD,
+void CalcMacMedSP27(  real* vxD,
                                  real* vyD,
                                  real* vzD,
                                  real* rhoD,
@@ -482,7 +482,7 @@ extern "C" void CalcMacMedSP27(  real* vxD,
                                  unsigned int numberOfThreads, 
                                  bool isEvenTimestep);
 
-extern "C" void ResetMedianValuesSP27(
+void ResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -492,7 +492,7 @@ extern "C" void ResetMedianValuesSP27(
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
-extern "C" void ResetMedianValuesAD27(
+void ResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -503,7 +503,7 @@ extern "C" void ResetMedianValuesAD27(
 	unsigned int numberOfThreads,
 	bool isEvenTimestep);
 
-extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* kyzFromfcNEQ,
 										 real* kxzFromfcNEQ,
 										 real* kxxMyyFromfcNEQ,
@@ -517,7 +517,7 @@ extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* DD,
 										 bool isEvenTimestep);
 
-extern "C" void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   real* kyzFromfcNEQ,
 									   real* kxzFromfcNEQ,
 									   real* kxxMyyFromfcNEQ,
@@ -531,7 +531,7 @@ extern "C" void Calc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 									   real* DD,
 									   bool isEvenTimestep);
 
-extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
+void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* CUMabc,
 										 real* CUMbac,
 										 real* CUMbca,
@@ -547,7 +547,7 @@ extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* DD,
 										 bool isEvenTimestep);
 
-extern "C" void Calc3rdMomentsCompSP27(real* CUMbbb,
+void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   real* CUMabc,
 									   real* CUMbac,
 									   real* CUMbca,
@@ -563,7 +563,7 @@ extern "C" void Calc3rdMomentsCompSP27(real* CUMbbb,
 									   real* DD,
 									   bool isEvenTimestep);
 
-extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
+void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -582,7 +582,7 @@ extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* DD,
 											bool isEvenTimestep);
 
-extern "C" void CalcHigherMomentsCompSP27(real* CUMcbb,
+void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  real* CUMbcb,
 										  real* CUMbbc,
 										  real* CUMcca,
@@ -601,7 +601,7 @@ extern "C" void CalcHigherMomentsCompSP27(real* CUMcbb,
 										  real* DD,
 										  bool isEvenTimestep);
 
-extern "C" void LBCalcMeasurePoints27(real* vxMP,
+void LBCalcMeasurePoints27(real* vxMP,
                                       real* vyMP,
                                       real* vzMP,
                                       real* rhoMP,
@@ -618,7 +618,7 @@ extern "C" void LBCalcMeasurePoints27(real* vxMP,
                                       unsigned int numberOfThreads, 
                                       bool isEvenTimestep);
 
-extern "C" void BcPress27(int nx, 
+void BcPress27(int nx, 
                           int ny, 
                           int tz, 
                           unsigned int grid_nx, 
@@ -631,7 +631,7 @@ extern "C" void BcPress27(int nx,
                           unsigned int size_Mat, 
                           bool isEvenTimestep);
 
-extern "C" void BcVel27(int nx, 
+void BcVel27(int nx, 
                         int ny, 
                         int nz, 
                         int itz, 
@@ -647,11 +647,11 @@ extern "C" void BcVel27(int nx,
                         real u0x, 
                         real om);
 
-extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
+void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
@@ -665,9 +665,9 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
+void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 real* DD, 
 									 int* k_Q, 
 									 real* QQ,
@@ -679,7 +679,7 @@ extern "C" void QDevIncompHighNu27(  unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QDevCompHighNu27(unsigned int numberOfThreads,
+void QDevCompHighNu27(unsigned int numberOfThreads,
 								 real* DD, 
 								 int* k_Q, 
 								 real* QQ,
@@ -691,9 +691,9 @@ extern "C" void QDevCompHighNu27(unsigned int numberOfThreads,
 								 unsigned int size_Mat, 
 								 bool isEvenTimestep);
 
-extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 	
-extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
+void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -708,7 +708,7 @@ extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QVelDevice1h27( unsigned int numberOfThreads,
+void QVelDevice1h27( unsigned int numberOfThreads,
 								int nx,
 								int ny,
 								real* vx,
@@ -730,9 +730,9 @@ extern "C" void QVelDevice1h27( unsigned int numberOfThreads,
 								unsigned int size_Mat, 
 								bool isEvenTimestep);
 
-extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
+void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -747,9 +747,9 @@ extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  unsigned int size_Mat, 
 									  bool isEvenTimestep);
 
-extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
+void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* vx,
 							           real* vy,
 							           real* vz,
@@ -766,9 +766,9 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           unsigned int size_Mat, 
 							           bool isEvenTimestep);
 
-extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
+void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										real* vx,
 										real* vy,
 										real* vz,
@@ -783,7 +783,7 @@ extern "C" void QVelDevIncompHighNu27(  unsigned int numberOfThreads,
 										unsigned int size_Mat, 
 										bool isEvenTimestep);
 
-extern "C" void QVelDevCompHighNu27(unsigned int numberOfThreads,
+void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -798,7 +798,7 @@ extern "C" void QVelDevCompHighNu27(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
+void QVeloDevEQ27(unsigned int numberOfThreads,
 							 real* VeloX,
 							 real* VeloY,
 							 real* VeloZ,
@@ -812,7 +812,7 @@ extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
 							 unsigned int size_Mat, 
 							 bool isEvenTimestep);
 
-extern "C" void QVeloStreetDevEQ27(
+void QVeloStreetDevEQ27(
 	uint  numberOfThreads,
 	real* veloXfraction,
 	real* veloYfraction,
@@ -827,13 +827,17 @@ extern "C" void QVeloStreetDevEQ27(
 	uint  size_Mat,
 	bool  isEvenTimestep);
 
-extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QSlipGeomDevComp27( unsigned int numberOfThreads,
+void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+
+void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+
+void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									real* DD, 
 									int* k_Q, 
 									real* QQ,
@@ -848,7 +852,7 @@ extern "C" void QSlipGeomDevComp27( unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
+void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD, 
 								   int* k_Q, 
 								   real* QQ,
@@ -863,13 +867,15 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
+void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
 
-extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
+void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
 
-extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level);
 
-extern "C" void QPressDevFixBackflow27(unsigned int numberOfThreads,
+void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+
+void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        real* rhoBC,
                                        real* DD, 
                                        int* k_Q, 
@@ -881,7 +887,7 @@ extern "C" void QPressDevFixBackflow27(unsigned int numberOfThreads,
                                        unsigned int size_Mat, 
                                        bool isEvenTimestep);
 
-extern "C" void QPressDevDirDepBot27(unsigned int numberOfThreads,
+void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD, 
                                      int* k_Q, 
@@ -893,11 +899,13 @@ extern "C" void QPressDevDirDepBot27(unsigned int numberOfThreads,
                                      unsigned int size_Mat, 
                                      bool isEvenTimestep);
 
-extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+
+void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevOld27(unsigned int numberOfThreads,
+void QPressDevOld27(unsigned int numberOfThreads,
                                real* rhoBC,
                                real* DD, 
                                int* k_Q, 
@@ -910,13 +918,13 @@ extern "C" void QPressDevOld27(unsigned int numberOfThreads,
                                unsigned int size_Mat, 
                                bool isEvenTimestep);
 
-extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDevZero27(unsigned int numberOfThreads,
+void QPressDevZero27(unsigned int numberOfThreads,
                                 real* DD, 
                                 int* k_Q, 
                                 unsigned int numberOfBCnodes, 
@@ -926,7 +934,7 @@ extern "C" void QPressDevZero27(unsigned int numberOfThreads,
                                 unsigned int size_Mat, 
                                 bool isEvenTimestep);
 
-extern "C" void QPressDevFake27(   unsigned int numberOfThreads,
+void QPressDevFake27(   unsigned int numberOfThreads,
 								   real* rhoBC,
 								   real* DD, 
 								   int* k_Q, 
@@ -939,9 +947,9 @@ extern "C" void QPressDevFake27(   unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
+void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition);
 
-extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
+void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* rho,
 									real* DD, 
 									int* k_Q, 
@@ -954,7 +962,7 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
+void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  real* rhoBC,
 								  real* vx,
 								  real* vy,
@@ -970,7 +978,7 @@ extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
 								  unsigned int size_Mat, 
 								  bool isEvenTimestep);
 
-extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
+void PressSchlaffer27(unsigned int numberOfThreads,
                                  real* rhoBC,
                                  real* DD,
                                  real* vx0,
@@ -987,7 +995,7 @@ extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
                                  unsigned int size_Mat, 
                                  bool isEvenTimestep);
 
-extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
+void VelSchlaffer27(  unsigned int numberOfThreads,
                                  int t,
                                  real* DD,
                                  real* vz0,
@@ -1031,7 +1039,7 @@ extern "C" void QADDev7(unsigned int numberOfThreads,
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief Advection Diffusion kernel
-extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
+void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 	uint numberOfThreads,
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
@@ -1046,7 +1054,7 @@ extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief defines the behavior of a slip-AD boundary condition
-extern "C" void ADSlipVelDevComp(
+void ADSlipVelDevComp(
 	uint numberOfThreads,
 	real * normalX,
 	real * normalY,
@@ -1063,7 +1071,7 @@ extern "C" void ADSlipVelDevComp(
 	uint size_Mat,
 	bool isEvenTimestep);
 	
-extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
+void QADDirichletDev27( unsigned int numberOfThreads,
 								   real* DD, 
 								   real* DD27,
 								   real* temp,
@@ -1078,7 +1086,7 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
 								   unsigned int size_Mat, 
 								   bool isEvenTimestep);
 
-extern "C" void QADBBDev27(  unsigned int numberOfThreads,
+void QADBBDev27(  unsigned int numberOfThreads,
 							 real* DD, 
 							 real* DD27,
 							 real* temp,
@@ -1093,7 +1101,7 @@ extern "C" void QADBBDev27(  unsigned int numberOfThreads,
 							 unsigned int size_Mat, 
 							 bool isEvenTimestep);
 
-extern "C" void QADVelDev7(unsigned int numberOfThreads,
+void QADVelDev7(unsigned int numberOfThreads,
                            real* DD, 
                            real* DD7,
                            real* temp,
@@ -1110,7 +1118,7 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
                            bool isEvenTimestep);
 
 
-extern "C" void QADVelDev27(  unsigned int numberOfThreads,
+void QADVelDev27(  unsigned int numberOfThreads,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1126,7 +1134,7 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressDev7( unsigned int numberOfThreads,
+void QADPressDev7( unsigned int numberOfThreads,
                               real* DD, 
                               real* DD7,
                               real* temp,
@@ -1142,7 +1150,7 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressDev27(unsigned int numberOfThreads,
+void QADPressDev27(unsigned int numberOfThreads,
                               real* DD, 
                               real* DD27,
                               real* temp,
@@ -1158,7 +1166,7 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
                               unsigned int size_Mat, 
                               bool isEvenTimestep);
 
-extern "C" void QADPressNEQNeighborDev27(
+void QADPressNEQNeighborDev27(
 											unsigned int numberOfThreads,
 											real* DD,
 											real* DD27,
@@ -1172,7 +1180,7 @@ extern "C" void QADPressNEQNeighborDev27(
 											bool isEvenTimestep
 										);
 
-extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
+void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									real* DD, 
 									real* DD7,
 									real* temp,
@@ -1187,7 +1195,7 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
+void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 real* DD, 
 									 real* DD27,
 									 real* temp,
@@ -1202,7 +1210,7 @@ extern "C" void QNoSlipADincompDev27(unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
+void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real* DD, 
 								   real* DD7,
 								   real* temp,
@@ -1219,7 +1227,7 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   bool isEvenTimestep);
 
 
-extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
+void QADVeloIncompDev27( unsigned int numberOfThreads,
 									real* DD, 
 									real* DD27,
 									real* temp,
@@ -1235,7 +1243,7 @@ extern "C" void QADVeloIncompDev27( unsigned int numberOfThreads,
 									unsigned int size_Mat, 
 									bool isEvenTimestep);
 
-extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
+void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 real* DD, 
 									 real* DD7,
 									 real* temp,
@@ -1251,7 +1259,7 @@ extern "C" void QADPressIncompDev7(  unsigned int numberOfThreads,
 									 unsigned int size_Mat, 
 									 bool isEvenTimestep);
 
-extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
+void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real* DD, 
 									  real* DD27,
 									  real* temp,
@@ -1267,7 +1275,7 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  unsigned int size_Mat, 
 									  bool isEvenTimestep);
 
-extern "C" void PropVelo(   unsigned int numberOfThreads,
+void PropVelo(   unsigned int numberOfThreads,
 							unsigned int* neighborX,
 							unsigned int* neighborY,
 							unsigned int* neighborZ,
@@ -1282,7 +1290,7 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
 							real* DD,
 							bool EvenOrOdd);
 
-extern "C" void ScaleCF27( real* DC, 
+void ScaleCF27( real* DC, 
                            real* DF, 
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -1305,7 +1313,7 @@ extern "C" void ScaleCF27( real* DC,
                            unsigned int nyF,
                            unsigned int numberOfThreads);
 
-extern "C" void ScaleFC27( real* DC, 
+void ScaleFC27( real* DC, 
                            real* DF, 
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -1328,7 +1336,7 @@ extern "C" void ScaleFC27( real* DC,
                            unsigned int nyF,
                            unsigned int numberOfThreads);
 
-extern "C" void ScaleCFEff27(real* DC, 
+void ScaleCFEff27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -1352,7 +1360,7 @@ extern "C" void ScaleCFEff27(real* DC,
                              unsigned int numberOfThreads,
                              OffCF offCF);
 
-extern "C" void ScaleFCEff27(real* DC, 
+void ScaleFCEff27(real* DC, 
                              real* DF, 
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -1376,7 +1384,7 @@ extern "C" void ScaleFCEff27(real* DC,
                              unsigned int numberOfThreads,
                              OffFC offFC);
 
-extern "C" void ScaleCFLast27(real* DC, 
+void ScaleCFLast27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1400,7 +1408,7 @@ extern "C" void ScaleCFLast27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleFCLast27(real* DC, 
+void ScaleFCLast27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1424,7 +1432,7 @@ extern "C" void ScaleFCLast27(real* DC,
                               unsigned int numberOfThreads,
                               OffFC offFC);
 
-extern "C" void ScaleCFpress27(real* DC, 
+void ScaleCFpress27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1448,7 +1456,7 @@ extern "C" void ScaleCFpress27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleFCpress27(  real* DC, 
+void ScaleFCpress27(  real* DC, 
                                  real* DF, 
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -1472,7 +1480,7 @@ extern "C" void ScaleFCpress27(  real* DC,
                                  unsigned int numberOfThreads,
                                  OffFC offFC);
 
-extern "C" void ScaleCF_Fix_27(real* DC, 
+void ScaleCF_Fix_27(real* DC, 
                               real* DF, 
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -1496,7 +1504,7 @@ extern "C" void ScaleCF_Fix_27(real* DC,
                               unsigned int numberOfThreads,
                               OffCF offCF);
 
-extern "C" void ScaleCF_Fix_comp_27(   real* DC, 
+void ScaleCF_Fix_comp_27(   real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1520,7 +1528,7 @@ extern "C" void ScaleCF_Fix_comp_27(   real* DC,
 									   unsigned int numberOfThreads,
 									   OffCF offCF);
 
-extern "C" void ScaleCF_0817_comp_27(  real* DC, 
+void ScaleCF_0817_comp_27(  real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1545,7 +1553,7 @@ extern "C" void ScaleCF_0817_comp_27(  real* DC,
 									   OffCF offCF,
 									   CUstream_st* stream);
 
-extern "C" void ScaleCF_comp_D3Q27F3_2018(	real* DC,
+void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											real* DF,
 											real* G6, 
 											unsigned int* neighborCX,
@@ -1570,7 +1578,7 @@ extern "C" void ScaleCF_comp_D3Q27F3_2018(	real* DC,
 											unsigned int numberOfThreads,
 											OffCF offCF);
 
-extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
+void ScaleCF_comp_D3Q27F3(real* DC,
 									 real* DF,
 									 real* G6, 
 									 unsigned int* neighborCX,
@@ -1596,7 +1604,7 @@ extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
 									 OffCF offCF,
 									 CUstream_st *stream);
 
-extern "C" void ScaleCF_staggered_time_comp_27( real* DC, 
+void ScaleCF_staggered_time_comp_27( real* DC, 
 												real* DF, 
 												unsigned int* neighborCX,
 												unsigned int* neighborCY,
@@ -1620,7 +1628,7 @@ extern "C" void ScaleCF_staggered_time_comp_27( real* DC,
 												unsigned int numberOfThreads,
 												OffCF offCF);
 
-extern "C" void ScaleCF_RhoSq_comp_27(  real* DC, 
+void ScaleCF_RhoSq_comp_27(  real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1645,7 +1653,7 @@ extern "C" void ScaleCF_RhoSq_comp_27(  real* DC,
 										OffCF offCF,
                                         CUstream_st *stream);
 
-extern "C" void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
+void ScaleCF_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1670,7 +1678,7 @@ extern "C" void ScaleCF_RhoSq_3rdMom_comp_27( real* DC,
 											  OffCF offCF,
 											  CUstream_st *stream);
 
-extern "C" void ScaleCF_AA2016_comp_27( real* DC, 
+void ScaleCF_AA2016_comp_27( real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1695,7 +1703,7 @@ extern "C" void ScaleCF_AA2016_comp_27( real* DC,
 										OffCF offCF,
 										CUstream_st *stream);
 
-extern "C" void ScaleCF_NSPress_27(real* DC, 
+void ScaleCF_NSPress_27(real* DC, 
 								  real* DF, 
 								  unsigned int* neighborCX,
 								  unsigned int* neighborCY,
@@ -1719,7 +1727,7 @@ extern "C" void ScaleCF_NSPress_27(real* DC,
 								  unsigned int numberOfThreads,
 								  OffCF offCF);
 
-extern "C" void ScaleFC_Fix_27(  real* DC, 
+void ScaleFC_Fix_27(  real* DC, 
                                  real* DF, 
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -1743,7 +1751,7 @@ extern "C" void ScaleFC_Fix_27(  real* DC,
                                  unsigned int numberOfThreads,
                                  OffFC offFC);
 
-extern "C" void ScaleFC_Fix_comp_27(   real* DC, 
+void ScaleFC_Fix_comp_27(   real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1767,7 +1775,7 @@ extern "C" void ScaleFC_Fix_comp_27(   real* DC,
 									   unsigned int numberOfThreads,
 									   OffFC offFC);
 
-extern "C" void ScaleFC_0817_comp_27(  real* DC, 
+void ScaleFC_0817_comp_27(  real* DC, 
 									   real* DF, 
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -1792,7 +1800,7 @@ extern "C" void ScaleFC_0817_comp_27(  real* DC,
 									   OffFC offFC,
 									   CUstream_st *stream);
 
-extern "C" void ScaleFC_comp_D3Q27F3_2018(real* DC,
+void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  real* DF,
 										  real* G6,
 										  unsigned int* neighborCX,
@@ -1817,7 +1825,7 @@ extern "C" void ScaleFC_comp_D3Q27F3_2018(real* DC,
 										  unsigned int numberOfThreads,
 										  OffFC offFC);
 
-extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
+void ScaleFC_comp_D3Q27F3( real* DC,
 									  real* DF,
 									  real* G6,
 									  unsigned int* neighborCX,
@@ -1843,7 +1851,7 @@ extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
 									  OffFC offFC,
 									  CUstream_st *stream);
 
-extern "C" void ScaleFC_staggered_time_comp_27( real* DC, 
+void ScaleFC_staggered_time_comp_27( real* DC, 
 												real* DF, 
 												unsigned int* neighborCX,
 												unsigned int* neighborCY,
@@ -1867,7 +1875,7 @@ extern "C" void ScaleFC_staggered_time_comp_27( real* DC,
 												unsigned int numberOfThreads,
 												OffFC offFC);
 
-extern "C" void ScaleFC_RhoSq_comp_27(  real* DC, 
+void ScaleFC_RhoSq_comp_27(  real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1892,7 +1900,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(  real* DC,
 	                                    OffFC offFC,
                                         CUstream_st *stream);
 
-extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
+void ScaleFC_RhoSq_3rdMom_comp_27( real* DC, 
 											  real* DF, 
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1917,7 +1925,7 @@ extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  OffFC offFC,
 											  CUstream_st *stream);
 
-extern "C" void ScaleFC_AA2016_comp_27( real* DC, 
+void ScaleFC_AA2016_comp_27( real* DC, 
 										real* DF, 
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -1942,7 +1950,7 @@ extern "C" void ScaleFC_AA2016_comp_27( real* DC,
 										OffFC offFC,
 										CUstream_st *stream);
 
-extern "C" void ScaleFC_NSPress_27(  real* DC, 
+void ScaleFC_NSPress_27(  real* DC, 
 									 real* DF, 
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -1966,7 +1974,7 @@ extern "C" void ScaleFC_NSPress_27(  real* DC,
 									 unsigned int numberOfThreads,
 									 OffFC offFC);
 
-extern "C" void ScaleCFThS7(  real* DC, 
+void ScaleCFThS7(  real* DC, 
                               real* DF, 
                               real* DD7C, 
                               real* DD7F,
@@ -1986,7 +1994,7 @@ extern "C" void ScaleCFThS7(  real* DC,
                               real diffusivity_fine,
                               unsigned int numberOfThreads);
 
-extern "C" void ScaleFCThS7(  real* DC, 
+void ScaleFCThS7(  real* DC, 
                               real* DF,
                               real* DD7C, 
                               real* DD7F,
@@ -2006,7 +2014,7 @@ extern "C" void ScaleFCThS7(  real* DC,
                               real diffusivity_coarse,
                               unsigned int numberOfThreads);
 
-extern "C" void ScaleCFThSMG7(   real* DC, 
+void ScaleCFThSMG7(   real* DC, 
                                  real* DF,
                                  real* DD7C, 
                                  real* DD7F,
@@ -2027,7 +2035,7 @@ extern "C" void ScaleCFThSMG7(   real* DC,
                                  unsigned int numberOfThreads,
                                  OffCF offCF);
 
-extern "C" void ScaleFCThSMG7(real* DC, 
+void ScaleFCThSMG7(real* DC, 
                               real* DF,
                               real* DD7C, 
                               real* DD7F,
@@ -2048,7 +2056,7 @@ extern "C" void ScaleFCThSMG7(real* DC,
                               unsigned int numberOfThreads,
                               OffFC offFC);
 
-extern "C" void ScaleCFThS27( real* DC, 
+void ScaleCFThS27( real* DC, 
                               real* DF, 
                               real* DD27C, 
                               real* DD27F,
@@ -2069,7 +2077,7 @@ extern "C" void ScaleCFThS27( real* DC,
 							  unsigned int numberOfThreads,
 							  OffCF offCF);
 
-extern "C" void ScaleFCThS27( real* DC, 
+void ScaleFCThS27( real* DC, 
                               real* DF,
                               real* DD27C, 
                               real* DD27F,
@@ -2090,7 +2098,7 @@ extern "C" void ScaleFCThS27( real* DC,
 							  unsigned int numberOfThreads,
 							  OffFC offFC);
 
-extern "C" void DragLiftPostD27(real* DD, 
+void DragLiftPostD27(real* DD, 
 								int* k_Q, 
 								real* QQ,
 								int numberOfBCnodes, 
@@ -2104,7 +2112,7 @@ extern "C" void DragLiftPostD27(real* DD,
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
-extern "C" void DragLiftPreD27( real* DD, 
+void DragLiftPreD27( real* DD, 
 								int* k_Q, 
 								real* QQ,
 								int numberOfBCnodes, 
@@ -2118,7 +2126,7 @@ extern "C" void DragLiftPreD27( real* DD,
 								bool isEvenTimestep,
 								unsigned int numberOfThreads);
 
-extern "C" void CalcCPtop27(real* DD, 
+void CalcCPtop27(real* DD, 
 							int* cpIndex, 
 							int nonCp, 
 							double *cpPress,
@@ -2129,7 +2137,7 @@ extern "C" void CalcCPtop27(real* DD,
 							bool isEvenTimestep,
 							unsigned int numberOfThreads);
 
-extern "C" void CalcCPbottom27(real* DD, 
+void CalcCPbottom27(real* DD, 
 							   int* cpIndex, 
 							   int nonCp, 
 							   double *cpPress,
@@ -2140,7 +2148,7 @@ extern "C" void CalcCPbottom27(real* DD,
 							   bool isEvenTimestep,
 							   unsigned int numberOfThreads);
 
-extern "C" void GetSendFsPreDev27(real* DD,
+void GetSendFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* sendIndex,
 								  int buffmax,
@@ -2152,7 +2160,7 @@ extern "C" void GetSendFsPreDev27(real* DD,
 								  unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void GetSendFsPostDev27(real* DD,
+void GetSendFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* sendIndex,
 								   int buffmax,
@@ -2164,7 +2172,7 @@ extern "C" void GetSendFsPostDev27(real* DD,
 								   unsigned int numberOfThreads, 
 	                               cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void SetRecvFsPreDev27(real* DD,
+void SetRecvFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* recvIndex,
 								  int buffmax,
@@ -2175,7 +2183,7 @@ extern "C" void SetRecvFsPreDev27(real* DD,
 								  bool isEvenTimestep, unsigned int numberOfThreads, 
 	                              cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void SetRecvFsPostDev27(real* DD,
+void SetRecvFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* recvIndex,
 								   int buffmax,
@@ -2187,7 +2195,7 @@ extern "C" void SetRecvFsPostDev27(real* DD,
 								   unsigned int numberOfThreads,
                                    cudaStream_t stream = CU_STREAM_LEGACY);
 
-extern "C" void getSendGsDevF3(
+void getSendGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -2199,7 +2207,7 @@ extern "C" void getSendGsDevF3(
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
-extern "C" void setRecvGsDevF3(
+void setRecvGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -2211,7 +2219,7 @@ extern "C" void setRecvGsDevF3(
 	bool isEvenTimestep,
 	unsigned int numberOfThreads);
 
-extern "C" void WallFuncDev27(unsigned int numberOfThreads,
+void WallFuncDev27(unsigned int numberOfThreads,
 							  real* vx,
 							  real* vy,
 							  real* vz,
@@ -2226,7 +2234,7 @@ extern "C" void WallFuncDev27(unsigned int numberOfThreads,
 							  unsigned int size_Mat, 
 							  bool isEvenTimestep);
 
-extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
+void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* vxD,
 										  real* vyD,
 										  real* vzD,
@@ -2245,7 +2253,7 @@ extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* DD,
 										  bool isEvenTimestep);
 
-extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
+void GetVelotoForce27(unsigned int numberOfThreads,
 								 real* DD, 
 								 int* bcIndex, 
 								 int nonAtBC, 
@@ -2258,7 +2266,7 @@ extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
 								 unsigned int size_Mat, 
 								 bool isEvenTimestep);
 
-extern "C" void InitParticlesDevice(real* coordX,
+void InitParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ, 
 									real* coordParticleXlocal,
@@ -2283,7 +2291,7 @@ extern "C" void InitParticlesDevice(real* coordX,
 									unsigned int size_Mat,
 									unsigned int numberOfThreads);
 
-extern "C" void MoveParticlesDevice(real* coordX,
+void MoveParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ, 
 									real* coordParticleXlocal,
@@ -2312,16 +2320,16 @@ extern "C" void MoveParticlesDevice(real* coordX,
 									unsigned int numberOfThreads,
 									bool isEvenTimestep);
 
-extern "C" void initRandomDevice(curandState* state,
+void initRandomDevice(curandState* state,
 								 unsigned int size_Mat,
 								 unsigned int numberOfThreads);
 
-extern "C" void generateRandomValuesDevice(curandState* state,
+void generateRandomValuesDevice(curandState* state,
 										   unsigned int size_Mat,
 										   real* randArray,
 										   unsigned int numberOfThreads);
 
-extern "C" void CalcTurbulenceIntensityDevice(
+void CalcTurbulenceIntensityDevice(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
index 2a1f168fce51b3e6caef1f18c05e09bd82387307..af5c7bbe62c5b534f75c80a3348cb79abc66c713 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
+++ b/src/gpu/VirtualFluids_GPU/GPU/GPU_Kernels.cuh
@@ -11,10 +11,12 @@
 //random numbers
 #include <curand.h>
 #include <curand_kernel.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
 
 #include "LBM/LB.h"
 
-extern "C" __global__ void LB_Kernel_Casc27(real s9,
+__global__ void LB_Kernel_Casc27(real s9,
                                             unsigned int* bcMatD,
                                             unsigned int* neighborX,
                                             unsigned int* neighborY,
@@ -23,7 +25,7 @@ extern "C" __global__ void LB_Kernel_Casc27(real s9,
                                             int size_Mat,
                                             bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_27(  real s9,
+__global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   unsigned int* bcMatD,
                                                   unsigned int* neighborX,
                                                   unsigned int* neighborY,
@@ -32,7 +34,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_27(  real s9,
                                                   int size_Mat,
                                                   bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
+__global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       unsigned int* bcMatD,
                                                       unsigned int* neighborX,
                                                       unsigned int* neighborY,
@@ -41,7 +43,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_27(   real s9,
                                                       int size_Mat,
                                                       bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
+__global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          unsigned int* bcMatD,
                                                          unsigned int* neighborX,
                                                          unsigned int* neighborY,
@@ -50,7 +52,7 @@ extern "C" __global__ void LB_Kernel_Casc_SP_MS_OHM_27(  real s9,
                                                          int size_Mat,
                                                          bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
+__global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -62,7 +64,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_Comp_SRT_SP_27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -74,7 +76,7 @@ extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(real omega,
 														bool EvenOrOdd);
 
 
-extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
+__global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 																unsigned int* bcMatD,
 																unsigned int* neighborX,
 																unsigned int* neighborY,
@@ -87,7 +89,7 @@ extern "C" __global__ void LB_Kernel_Kum_AA2016_Comp_Bulk_SP_27(real omega,
 
 
 
-extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
+__global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													real deltaPhi,
 													real angularVelocity,
 													unsigned int* bcMatD,
@@ -101,7 +103,7 @@ extern "C" __global__ void LB_Kernel_Kum_1h_SP_27(  real omega,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Cascade_SP_27( real s9,
+__global__ void LB_Kernel_Cascade_SP_27( real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -110,7 +112,7 @@ extern "C" __global__ void LB_Kernel_Cascade_SP_27( real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_New_SP_27( real s9,
+__global__ void LB_Kernel_Kum_New_SP_27( real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -119,7 +121,7 @@ extern "C" __global__ void LB_Kernel_Kum_New_SP_27( real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
+__global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -131,7 +133,7 @@ extern "C" __global__ void LB_Kernel_Kum_IsoTest_SP_27( real omega,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
 													unsigned int* bcMatD,
 													unsigned int* neighborX,
 													unsigned int* neighborY,
@@ -140,7 +142,7 @@ extern "C" __global__ void LB_Kernel_Kum_Comp_SP_27(real s9,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -152,7 +154,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_errorDiffusion_chim_Comp_
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
+__global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -164,7 +166,7 @@ extern "C" __global__ void Cumulant_One_preconditioned_chim_Comp_SP_27(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void Cumulant_One_chim_Comp_SP_27(
+__global__ void Cumulant_One_chim_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -186,7 +188,7 @@ inline __device__ void backwardChimeraWithK(real &mfa, real &mfb, real &mfc, rea
 
 
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
+__global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -204,7 +206,7 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	bool EvenOrOdd);
 
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
 															unsigned int* neighborZ,
@@ -219,7 +221,7 @@ extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27( real omega,
 															unsigned int* nodeIdsPorousMedia,
 															bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
 												  unsigned int* bcMatD,
 												  unsigned int* neighborX,
 												  unsigned int* neighborY,
@@ -229,7 +231,7 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_7( real diffusivity,
 												  int size_Mat,
 												  bool EvenOrOdd);
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
 												   unsigned int* bcMatD,
 												   unsigned int* neighborX,
 												   unsigned int* neighborY,
@@ -239,7 +241,7 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27( real diffusivity,
 												   int size_Mat,
 												   bool EvenOrOdd);
 
-extern "C" __global__ void LBInit27( int myid,
+__global__ void LBInit27( int myid,
                                      int numprocs,
                                      real u0,
                                      unsigned int* geoD,
@@ -255,7 +257,7 @@ extern "C" __global__ void LBInit27( int myid,
                                      int lev,
                                      int maxlev);
 
-extern "C" __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
+__global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                unsigned int* neighborY,
                                                unsigned int* neighborZ,
                                                unsigned int* neighborWSB,
@@ -269,7 +271,7 @@ extern "C" __global__ void LBInitNonEqPartSP27(unsigned int* neighborX,
                                                real omega,
                                                bool EvenOrOdd);
 
-extern "C" __global__ void InitAD7( unsigned int* neighborX,
+__global__ void InitAD7( unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        unsigned int* geoD,
@@ -281,7 +283,7 @@ extern "C" __global__ void InitAD7( unsigned int* neighborX,
                                        real* DD7,
                                        bool EvenOrOdd);
 
-extern "C" __global__ void InitAD27(unsigned int* neighborX,
+__global__ void InitAD27(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        unsigned int* geoD,
@@ -293,7 +295,7 @@ extern "C" __global__ void InitAD27(unsigned int* neighborX,
                                        real* DD27,
                                        bool EvenOrOdd);
 
-extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(
+__global__ void LB_PostProcessor_F3_2018_Fehlberg(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -310,7 +312,7 @@ extern "C" __global__ void LB_PostProcessor_F3_2018_Fehlberg(
 	real* forces,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcMac27( real* vxD,
+__global__ void LBCalcMac27( real* vxD,
                                         real* vyD,
                                         real* vzD,
                                         real* rhoD,
@@ -322,7 +324,7 @@ extern "C" __global__ void LBCalcMac27( real* vxD,
                                         real* DD,
                                         bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacSP27( real* vxD,
+__global__ void LBCalcMacSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -335,7 +337,7 @@ extern "C" __global__ void LBCalcMacSP27( real* vxD,
                                           real* DD,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacCompSP27( real* vxD,
+__global__ void LBCalcMacCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -348,7 +350,7 @@ extern "C" __global__ void LBCalcMacCompSP27( real* vxD,
 											  real* DD,
 											  bool isEvenTimestep);
 
-extern "C" __global__ void CalcConc7( real* Conc,
+__global__ void CalcConc7( real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -357,7 +359,7 @@ extern "C" __global__ void CalcConc7( real* Conc,
                                           real* DD7,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void GetPlaneConc7(real* Conc,
+__global__ void GetPlaneConc7(real* Conc,
 								            int* kPC,
 								            unsigned int numberOfPointskPC,
 											unsigned int* geoD,
@@ -368,7 +370,7 @@ extern "C" __global__ void GetPlaneConc7(real* Conc,
 											real* DD7,
 											bool isEvenTimestep);
 
-extern "C" __global__ void GetPlaneConc27(real* Conc,
+__global__ void GetPlaneConc27(real* Conc,
 								             int* kPC,
 								             unsigned int numberOfPointskPC,
 											 unsigned int* geoD,
@@ -379,7 +381,7 @@ extern "C" __global__ void GetPlaneConc27(real* Conc,
 											 real* DD27,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void CalcConc27(real* Conc,
+__global__ void CalcConc27(real* Conc,
                                           unsigned int* geoD,
                                           unsigned int* neighborX,
                                           unsigned int* neighborY,
@@ -388,7 +390,7 @@ extern "C" __global__ void CalcConc27(real* Conc,
                                           real* DD27,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedSP27( real* vxD,
+__global__ void LBCalcMedSP27( real* vxD,
                                           real* vyD,
                                           real* vzD,
                                           real* rhoD,
@@ -401,7 +403,7 @@ extern "C" __global__ void LBCalcMedSP27( real* vxD,
                                           real* DD,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
+__global__ void LBCalcMedCompSP27( real* vxD,
 											  real* vyD,
 											  real* vzD,
 											  real* rhoD,
@@ -414,7 +416,7 @@ extern "C" __global__ void LBCalcMedCompSP27( real* vxD,
 											  real* DD,
 											  bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMedCompAD27(
+__global__ void LBCalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -430,7 +432,7 @@ extern "C" __global__ void LBCalcMedCompAD27(
 	real* DD_AD,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
+__global__ void LBCalcMacMedSP27( real* vxD,
                                              real* vyD,
                                              real* vzD,
                                              real* rhoD,
@@ -443,7 +445,7 @@ extern "C" __global__ void LBCalcMacMedSP27( real* vxD,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void LBResetMedianValuesSP27(
+__global__ void LBResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -452,7 +454,7 @@ extern "C" __global__ void LBResetMedianValuesSP27(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBResetMedianValuesAD27(
+__global__ void LBResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -462,7 +464,7 @@ extern "C" __global__ void LBResetMedianValuesAD27(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* kyzFromfcNEQ,
 														real* kxzFromfcNEQ,
 														real* kxxMyyFromfcNEQ,
@@ -475,7 +477,7 @@ extern "C" __global__ void LBCalc2ndMomentsIncompSP27(  real* kxyFromfcNEQ,
 														real* DD,
 														bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
+__global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* kyzFromfcNEQ,
 													real* kxzFromfcNEQ,
 													real* kxxMyyFromfcNEQ,
@@ -488,7 +490,7 @@ extern "C" __global__ void LBCalc2ndMomentsCompSP27(real* kxyFromfcNEQ,
 													real* DD,
 													bool isEvenTimestep);
 
-extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
+__global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														real* CUMabc,
 														real* CUMbac,
 														real* CUMbca,
@@ -503,7 +505,7 @@ extern "C" __global__ void LBCalc3rdMomentsIncompSP27(  real* CUMbbb,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
+__global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													real* CUMabc,
 													real* CUMbac,
 													real* CUMbca,
@@ -518,7 +520,7 @@ extern "C" __global__ void LBCalc3rdMomentsCompSP27(real* CUMbbb,
 													int size_Mat,
 													bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
+__global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															real* CUMbcb,
 															real* CUMbbc,
 															real* CUMcca,
@@ -536,7 +538,7 @@ extern "C" __global__ void LBCalcHigherMomentsIncompSP27(   real* CUMcbb,
 															int size_Mat,
 															bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
+__global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														real* CUMbcb,
 														real* CUMbbc,
 														real* CUMcca,
@@ -554,7 +556,7 @@ extern "C" __global__ void LBCalcHigherMomentsCompSP27( real* CUMcbb,
 														int size_Mat,
 														bool EvenOrOdd);
 
-extern "C" __global__ void LBCalcMeasurePoints(real* vxMP,
+__global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* vyMP,
                                                real* vzMP,
                                                real* rhoMP,
@@ -570,7 +572,7 @@ extern "C" __global__ void LBCalcMeasurePoints(real* vxMP,
                                                real* DD,
                                                bool isEvenTimestep);
 
-extern "C" __global__ void LB_BC_Press_East27( int nx,
+__global__ void LB_BC_Press_East27( int nx,
                                                int ny,
                                                int tz,
                                                unsigned int* bcMatD,
@@ -581,7 +583,7 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
                                                unsigned int size_Mat,
                                                bool isEvenTimestep) ;
 
-extern "C" __global__ void LB_BC_Vel_West_27( int nx,
+__global__ void LB_BC_Vel_West_27( int nx,
                                               int ny,
                                               int nz,
                                               int itz,
@@ -598,7 +600,7 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
                                               real om);
 
 //no Slip BCs
-extern "C" __global__ void QDevice27(real* distributions,
+__global__ void QDevice27(real* distributions,
                                      int* subgridDistanceIndices,
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes,
@@ -609,7 +611,7 @@ extern "C" __global__ void QDevice27(real* distributions,
                                      unsigned int numberOfLBnodes,
                                      bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceComp27(
+__global__ void QDeviceComp27(
 										 real* distributions,
 										 int* subgridDistanceIndices,
 										 real* subgridDistances,
@@ -621,7 +623,7 @@ extern "C" __global__ void QDeviceComp27(
 										 unsigned int numberOfLBnodes,
 										 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceCompThinWallsPartOne27(real* DD,
+__global__ void QDeviceCompThinWallsPartOne27(real* DD,
 														 int* k_Q,
 														 real* QQ,
 														 unsigned int numberOfBCnodes,
@@ -632,7 +634,7 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(real* DD,
 														 unsigned int size_Mat,
 														 bool isEvenTimestep);
 
-extern "C" __global__ void QDevice3rdMomentsComp27(	 real* distributions, 
+__global__ void QDevice3rdMomentsComp27(	 real* distributions, 
 													 int* subgridDistanceIndices, 
 													 real* subgridDistances,
 													 unsigned int numberOfBCnodes, 
@@ -643,7 +645,7 @@ extern "C" __global__ void QDevice3rdMomentsComp27(	 real* distributions,
 													 unsigned int numberOfLBnodes, 
 													 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceIncompHighNu27(real* DD,
+__global__ void QDeviceIncompHighNu27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -651,10 +653,10 @@ extern "C" __global__ void QDeviceIncompHighNu27(real* DD,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
+												 unsigned int numberOfLBnodes,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QDeviceCompHighNu27(	 real* DD,
+__global__ void QDeviceCompHighNu27(	 real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -666,7 +668,7 @@ extern "C" __global__ void QDeviceCompHighNu27(	 real* DD,
 												 bool isEvenTimestep);
 
 //Velocity BCs
-extern "C" __global__ void QVelDevPlainBB27(
+__global__ void QVelDevPlainBB27(
     real* velocityX,
     real* velocityY,
     real* velocityZ,
@@ -680,7 +682,7 @@ extern "C" __global__ void QVelDevPlainBB27(
     uint numberOfLBnodes,
     bool isEvenTimestep);
 
-extern "C" __global__ void QVelDevCouette27(real* vx,
+__global__ void QVelDevCouette27(real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
@@ -694,7 +696,7 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QVelDev1h27( int inx,
+__global__ void QVelDev1h27( int inx,
 										int iny,
 										real* vx,
 										real* vy,
@@ -715,7 +717,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 										unsigned int size_Mat,
 										bool isEvenTimestep);
 
-extern "C" __global__ void QVelDevice27(int inx,
+__global__ void QVelDevice27(int inx,
                                         int iny,
                                         real* vx,
                                         real* vy,
@@ -731,7 +733,7 @@ extern "C" __global__ void QVelDevice27(int inx,
                                         unsigned int size_Mat,
                                         bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompPlusSlip27(real* vx,
+__global__ void QVelDeviceCompPlusSlip27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -745,7 +747,7 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceComp27(real* velocityX,
+__global__ void QVelDeviceComp27(real* velocityX,
 											real* velocityY,
 											real* velocityZ,
 											real* distribution,
@@ -759,7 +761,7 @@ extern "C" __global__ void QVelDeviceComp27(real* velocityX,
 											unsigned int numberOfLBnodes,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
+__global__ void QVelDeviceCompThinWallsPartOne27(
 	real* vx,
 	real* vy,
 	real* vz,
@@ -774,7 +776,7 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QThinWallsPartTwo27(
+__global__ void QThinWallsPartTwo27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
@@ -787,7 +789,7 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompZeroPress27(
+__global__ void QVelDeviceCompZeroPress27(
 	real* velocityX,
 	real* velocityY,
 	real* velocityZ,
@@ -802,7 +804,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
 	unsigned int numberOfLBnodes,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceIncompHighNu27(real* vx,
+__global__ void QVelDeviceIncompHighNu27(real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -816,7 +818,7 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVelDeviceCompHighNu27(	real* vx,
+__global__ void QVelDeviceCompHighNu27(	real* vx,
 													real* vy,
 													real* vz,
 													real* DD,
@@ -830,7 +832,7 @@ extern "C" __global__ void QVelDeviceCompHighNu27(	real* vx,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
+__global__ void QVeloDeviceEQ27(real* VeloX,
 										   real* VeloY,
 										   real* VeloZ,
                                            real* DD,
@@ -843,7 +845,7 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void QVeloStreetDeviceEQ27(
+__global__ void QVeloStreetDeviceEQ27(
 	real* veloXfraction,
 	real* veloYfraction,
 	int*  naschVelo,
@@ -858,7 +860,7 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 	bool  isEvenTimestep);
 
 //Slip BCs
-extern "C" __global__ void QSlipDevice27(real* DD,
+__global__ void QSlipDevice27(real* DD,
                                          int* k_Q,
                                          real* QQ,
                                          unsigned int numberOfBCnodes,
@@ -869,7 +871,7 @@ extern "C" __global__ void QSlipDevice27(real* DD,
                                          unsigned int size_Mat,
                                          bool isEvenTimestep);
 
-extern "C" __global__ void QSlipDeviceComp27(real* DD,
+__global__ void QSlipDeviceComp27(real* DD,
 											 int* k_Q,
 											 real* QQ,
 											 unsigned int numberOfBCnodes,
@@ -880,19 +882,45 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
-											 int* k_Q,
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1,
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 real* turbViscosity,
-											 unsigned int size_Mat,
-											 bool isEvenTimestep);
+// __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
+// 											 int* k_Q,
+// 											 real* QQ,
+// 											 unsigned int numberOfBCnodes,
+// 											 real om1,
+// 											 unsigned int* neighborX,
+// 											 unsigned int* neighborY,
+// 											 unsigned int* neighborZ,
+// 											 real* turbViscosity,
+// 											 unsigned int size_Mat,
+// 											 bool isEvenTimestep);
+
+__global__ void QSlipDeviceComp27TurbViscosity(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* turbViscosity,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep);
+
+__global__ void QSlipPressureDeviceComp27TurbViscosity(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* turbViscosity,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep);
 
-extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
+__global__ void QSlipGeomDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -906,7 +934,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 												 unsigned int size_Mat,
 												 bool isEvenTimestep);
 
-extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
+__global__ void QSlipNormDeviceComp27(real* DD,
 												 int* k_Q,
 												 real* QQ,
 												 unsigned int numberOfBCnodes,
@@ -921,7 +949,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 												 bool isEvenTimestep);
 
 // Stress BCs (wall model)
-extern "C" __global__ void QStressDeviceComp27(real* DD,
+__global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
 											 int* k_N,
 											 real* QQ,
@@ -953,7 +981,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void BBStressDevice27( real* DD,
+__global__ void BBStressDevice27( real* DD,
 												int* k_Q,
 												int* k_N,
 												real* QQ,
@@ -983,8 +1011,38 @@ extern "C" __global__ void BBStressDevice27( real* DD,
 												unsigned int size_Mat,
 												bool isEvenTimestep);
 
+__global__ void BBStressPressureDevice27( real* DD,
+											            int* k_Q,
+                                             int* k_N,
+                                             real* QQ,
+                                             unsigned int  numberOfBCnodes,
+                                             real* vx,
+                                             real* vy,
+                                             real* vz,
+                                             real* normalX,
+                                             real* normalY,
+                                             real* normalZ,
+                                             real* vx_el,
+                                             real* vy_el,
+                                             real* vz_el,
+                                             real* vx_w_mean,
+                                             real* vy_w_mean,
+                                             real* vz_w_mean,
+                                             int* samplingOffset,
+                                             real* z0,
+                                             bool  hasWallModelMonitor,
+                                             real* u_star_monitor,
+                                             real* Fx_monitor,
+                                             real* Fy_monitor,
+                                             real* Fz_monitor,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep);
+
 //Pressure BCs
-extern "C" __global__ void QPressDevice27( real* rhoBC,
+__global__ void QPressDevice27( real* rhoBC,
                                            real* DD,
                                            int* k_Q,
                                            real* QQ,
@@ -996,7 +1054,7 @@ extern "C" __global__ void QPressDevice27( real* rhoBC,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
+__global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   real* vx,
 												   real* vy,
 												   real* vz,
@@ -1011,7 +1069,7 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   unsigned int size_Mat,
 												   bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
+__global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD,
                                                       int* k_Q,
                                                       int numberOfBCnodes,
@@ -1022,7 +1080,7 @@ extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       unsigned int size_Mat,
                                                       bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
+__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      real* DD,
                                                      int* k_Q,
                                                      int numberOfBCnodes,
@@ -1033,8 +1091,8 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      unsigned int size_Mat,
                                                      bool isEvenTimestep);
 
-extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
-												 real* DD,
+__global__ void QPressNoRhoDevice27(  real* rhoBC,
+												 real* distributions,
 												 int* k_Q,
 												 int* k_N,
 												 int numberOfBCnodes,
@@ -1042,10 +1100,25 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat,
-												 bool isEvenTimestep);
+												 unsigned int numberOfLBnodes,
+												 bool isEvenTimestep,
+												 int direction);
+
+__global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
+											real* distributions, 
+											int* k_Q, 
+											int* k_N, 
+											int numberOfBCnodes, 
+											real om1, 
+											unsigned int* neighborX,
+											unsigned int* neighborY,
+											unsigned int* neighborZ,
+											unsigned int numberOfLBnodes, 
+											bool isEvenTimestep,
+											int direction,
+											real densityCorrectionFactor);
 
-extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
+__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 real* DD,
 														 int* k_Q,
 														 int* k_N,
@@ -1057,7 +1130,7 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 unsigned int size_Mat,
 														 bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
+__global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1069,7 +1142,7 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
+__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													real* DD,
 													int* k_Q,
 													int* k_N,
@@ -1081,7 +1154,7 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													unsigned int size_Mat,
 													bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
+__global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution,
                                              int* bcNodeIndices,
                                              int* bcNeighborIndices,
@@ -1093,7 +1166,7 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
+__global__ void QPressDeviceEQZ27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1106,7 +1179,7 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceZero27(  real* DD,
+__global__ void QPressDeviceZero27(  real* DD,
 												int* k_Q,
 												unsigned int numberOfBCnodes,
 												unsigned int* neighborX,
@@ -1115,7 +1188,7 @@ extern "C" __global__ void QPressDeviceZero27(  real* DD,
 												unsigned int size_Mat,
 												bool isEvenTimestep);
 
-extern "C" __global__ void QPressDeviceFake27(real* rhoBC,
+__global__ void QPressDeviceFake27(real* rhoBC,
                                              real* DD,
                                              int* k_Q,
                                              int* k_N,
@@ -1127,7 +1200,7 @@ extern "C" __global__ void QPressDeviceFake27(real* rhoBC,
                                              unsigned int size_Mat,
                                              bool isEvenTimestep);
 
-extern "C" __global__ void BBDevice27(real* distributions,
+__global__ void BBDevice27(real* distributions,
                                      int* subgridDistanceIndices,
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes,
@@ -1137,7 +1210,7 @@ extern "C" __global__ void BBDevice27(real* distributions,
                                      unsigned int numberOfLBnodes,
                                      bool isEvenTimestep);
 
-extern "C" __global__ void QPressDevice27_IntBB(real* rho,
+__global__ void QPressDevice27_IntBB(real* rho,
 												real* DD,
 												int* k_Q,
 												real* QQ,
@@ -1151,7 +1224,7 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //Schlaffer BCs
-extern "C" __global__ void PressSchlaff27(real* rhoBC,
+__global__ void PressSchlaff27(real* rhoBC,
                                           real* DD,
                                           real* vx0,
                                           real* vy0,
@@ -1168,7 +1241,7 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
                                           bool isEvenTimestep);
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
-extern "C" __global__ void VelSchlaff27(  int t,
+__global__ void VelSchlaff27(  int t,
                                           real* DD,
                                           real* vz0,
                                           real* deltaVz0,
@@ -1213,7 +1286,7 @@ extern "C" __global__ void QPrecursorDeviceCompZeroPress( 	int* k_Q,
 															unsigned long long size_Mat,
 															bool evenOrOdd);
 //Advection / Diffusion BCs
-extern "C" __global__ void QAD7( real* DD,
+__global__ void QAD7( real* DD,
                                  real* DD7,
                                  real* temp,
                                  real diffusivity,
@@ -1229,7 +1302,7 @@ extern "C" __global__ void QAD7( real* DD,
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref Advection_Diffusion_Device_Kernel : Factorized central moments for Advection Diffusion Equation
-extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
+__global__ void Factorized_Central_Moments_Advection_Diffusion_Device_Kernel(
 	real omegaDiffusivity,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -1243,7 +1316,7 @@ extern "C" __global__ void Factorized_Central_Moments_Advection_Diffusion_Device
 
 //////////////////////////////////////////////////////////////////////////
 //! \brief \ref AD_SlipVelDeviceComp : device function for the slip-AD boundary condition
-extern "C" __global__ void AD_SlipVelDeviceComp(
+__global__ void AD_SlipVelDeviceComp(
 	real * normalX,
 	real * normalY,
 	real * normalZ,
@@ -1259,7 +1332,7 @@ extern "C" __global__ void AD_SlipVelDeviceComp(
 	uint size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void QADDirichlet27(   real* DD,
+__global__ void QADDirichlet27(   real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
@@ -1273,7 +1346,7 @@ extern "C" __global__ void QADDirichlet27(   real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADBB27(  real* DD,
+__global__ void QADBB27(  real* DD,
 									 real* DD27,
 									 real* temp,
 									 real diffusivity,
@@ -1287,7 +1360,7 @@ extern "C" __global__ void QADBB27(  real* DD,
 									 unsigned int size_Mat,
 									 bool isEvenTimestep);
 
-extern "C" __global__ void QADVel7( real* DD,
+__global__ void QADVel7( real* DD,
                                     real* DD7,
                                     real* temp,
                                     real* velo,
@@ -1302,7 +1375,7 @@ extern "C" __global__ void QADVel7( real* DD,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADVel27(real* DD,
+__global__ void QADVel27(real* DD,
                                     real* DD27,
                                     real* temp,
                                     real* velo,
@@ -1317,7 +1390,7 @@ extern "C" __global__ void QADVel27(real* DD,
                                     unsigned int size_Mat,
                                     bool isEvenTimestep);
 
-extern "C" __global__ void QADPress7(  real* DD,
+__global__ void QADPress7(  real* DD,
                                        real* DD7,
                                        real* temp,
                                        real* velo,
@@ -1332,7 +1405,7 @@ extern "C" __global__ void QADPress7(  real* DD,
                                        unsigned int size_Mat,
                                        bool isEvenTimestep);
 
-extern "C" __global__ void QADPress27( real* DD,
+__global__ void QADPress27( real* DD,
                                        real* DD27,
                                        real* temp,
                                        real* velo,
@@ -1347,7 +1420,7 @@ extern "C" __global__ void QADPress27( real* DD,
                                        unsigned int size_Mat,
                                        bool isEvenTimestep);
 
-extern "C" __global__ void QADPressNEQNeighbor27(
+__global__ void QADPressNEQNeighbor27(
 												 real* DD,
 												 real* DD27,
 												 int* k_Q,
@@ -1360,7 +1433,7 @@ extern "C" __global__ void QADPressNEQNeighbor27(
 												 bool isEvenTimestep
 												);
 
-extern "C" __global__ void QNoSlipADincomp7( real* DD,
+__global__ void QNoSlipADincomp7( real* DD,
 											 real* DD7,
 											 real* temp,
 											 real diffusivity,
@@ -1374,7 +1447,7 @@ extern "C" __global__ void QNoSlipADincomp7( real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QNoSlipADincomp27( real* DD,
+__global__ void QNoSlipADincomp27( real* DD,
 											 real* DD27,
 											 real* temp,
 											 real diffusivity,
@@ -1388,7 +1461,7 @@ extern "C" __global__ void QNoSlipADincomp27( real* DD,
 											 unsigned int size_Mat,
 											 bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp7(  real* DD,
+__global__ void QADVeloIncomp7(  real* DD,
 											real* DD7,
 											real* temp,
 											real* velo,
@@ -1403,7 +1476,7 @@ extern "C" __global__ void QADVeloIncomp7(  real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADVeloIncomp27( real* DD,
+__global__ void QADVeloIncomp27( real* DD,
 											real* DD27,
 											real* temp,
 											real* velo,
@@ -1418,7 +1491,7 @@ extern "C" __global__ void QADVeloIncomp27( real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp7(real* DD,
+__global__ void QADPressIncomp7(real* DD,
 										   real* DD7,
 										   real* temp,
 										   real* velo,
@@ -1433,7 +1506,7 @@ extern "C" __global__ void QADPressIncomp7(real* DD,
 										   unsigned int size_Mat,
 										   bool isEvenTimestep);
 
-extern "C" __global__ void QADPressIncomp27(   real* DD,
+__global__ void QADPressIncomp27(   real* DD,
 											   real* DD27,
 											   real* temp,
 											   real* velo,
@@ -1449,7 +1522,7 @@ extern "C" __global__ void QADPressIncomp27(   real* DD,
 											   bool isEvenTimestep);
 
 //Propeller BC
-extern "C" __global__ void PropellerBC(unsigned int* neighborX,
+__global__ void PropellerBC(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        real* rho,
@@ -1466,7 +1539,7 @@ extern "C" __global__ void PropellerBC(unsigned int* neighborX,
 
 
 //coarse to fine
-extern "C" __global__ void scaleCF27(real* DC,
+__global__ void scaleCF27(real* DC,
                                      real* DF,
                                     unsigned int* neighborCX,
                                     unsigned int* neighborCY,
@@ -1488,7 +1561,7 @@ extern "C" __global__ void scaleCF27(real* DC,
 										       unsigned int nxF,
 										       unsigned int nyF);
 
-extern "C" __global__ void scaleCFEff27(real* DC,
+__global__ void scaleCFEff27(real* DC,
                                         real* DF,
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -1511,7 +1584,7 @@ extern "C" __global__ void scaleCFEff27(real* DC,
                                         unsigned int nyF,
                                         OffCF offCF);
 
-extern "C" __global__ void scaleCFLast27( real* DC,
+__global__ void scaleCFLast27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1534,7 +1607,7 @@ extern "C" __global__ void scaleCFLast27( real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCFpress27(real* DC,
+__global__ void scaleCFpress27(real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1557,7 +1630,7 @@ extern "C" __global__ void scaleCFpress27(real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCF_Fix_27(real* DC,
+__global__ void scaleCF_Fix_27(real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1580,7 +1653,7 @@ extern "C" __global__ void scaleCF_Fix_27(real* DC,
                                           unsigned int nyF,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCF_Fix_comp_27(   real* DC,
+__global__ void scaleCF_Fix_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1603,7 +1676,7 @@ extern "C" __global__ void scaleCF_Fix_comp_27(   real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_0817_comp_27(  real* DC,
+__global__ void scaleCF_0817_comp_27(  real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1626,7 +1699,7 @@ extern "C" __global__ void scaleCF_0817_comp_27(  real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
+__global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
 													  real* DF,
 													  real* G6,
 													  unsigned int* neighborCX,
@@ -1650,7 +1723,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018( real* DC,
 													  unsigned int nyF,
 													  OffCF offCF);
 
-extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
+__global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -1675,7 +1748,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 OffCF offCF);
 
 
-extern "C" __global__ void scaleCF_staggered_time_comp_27(real* DC,
+__global__ void scaleCF_staggered_time_comp_27(real* DC,
 														  real* DF,
 														  unsigned int* neighborCX,
 														  unsigned int* neighborCY,
@@ -1698,7 +1771,7 @@ extern "C" __global__ void scaleCF_staggered_time_comp_27(real* DC,
 														  unsigned int nyF,
 														  OffCF offCF);
 
-extern "C" __global__ void scaleCF_RhoSq_comp_27( real* DC,
+__global__ void scaleCF_RhoSq_comp_27( real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1721,7 +1794,7 @@ extern "C" __global__ void scaleCF_RhoSq_comp_27( real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
+__global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														real* DF,
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -1744,7 +1817,7 @@ extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyF,
 														OffCF offCF);
 
-extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
+__global__ void scaleCF_AA2016_comp_27(real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1767,7 +1840,7 @@ extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
 												  unsigned int nyF,
 												  OffCF offCF);
 
-extern "C" __global__ void scaleCF_NSPress_27(real* DC,
+__global__ void scaleCF_NSPress_27(real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -1790,7 +1863,7 @@ extern "C" __global__ void scaleCF_NSPress_27(real* DC,
 											  unsigned int nyF,
 											  OffCF offCF);
 
-extern "C" __global__ void scaleCFThSMG7( real* DC,
+__global__ void scaleCFThSMG7( real* DC,
                                           real* DF,
                                           real* DD7C,
                                           real* DD7F,
@@ -1810,7 +1883,7 @@ extern "C" __global__ void scaleCFThSMG7( real* DC,
                                           real diffusivity_fine,
                                           OffCF offCF);
 
-extern "C" __global__ void scaleCFThS7(real* DC,
+__global__ void scaleCFThS7(real* DC,
                                        real* DF,
                                        real* DD7C,
                                        real* DD7F,
@@ -1829,7 +1902,7 @@ extern "C" __global__ void scaleCFThS7(real* DC,
                                        real nu,
                                        real diffusivity_fine);
 
-extern "C" __global__ void scaleCFThS27(real* DC,
+__global__ void scaleCFThS27(real* DC,
                                         real* DF,
                                         real* DD27C,
                                         real* DD27F,
@@ -1850,7 +1923,7 @@ extern "C" __global__ void scaleCFThS27(real* DC,
 										OffCF offCF);
 
 //fine to coarse
-extern "C" __global__ void scaleFC27(real* DC,
+__global__ void scaleFC27(real* DC,
                                      real* DF,
                                     unsigned int* neighborCX,
                                     unsigned int* neighborCY,
@@ -1872,7 +1945,7 @@ extern "C" __global__ void scaleFC27(real* DC,
 										       unsigned int nxF,
                                      unsigned int nyF);
 
-extern "C" __global__ void scaleFCEff27(real* DC,
+__global__ void scaleFCEff27(real* DC,
                                         real* DF,
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -1895,7 +1968,7 @@ extern "C" __global__ void scaleFCEff27(real* DC,
                                         unsigned int nyF,
                                         OffFC offFC);
 
-extern "C" __global__ void scaleFCLast27( real* DC,
+__global__ void scaleFCLast27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1918,7 +1991,7 @@ extern "C" __global__ void scaleFCLast27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFCpress27( real* DC,
+__global__ void scaleFCpress27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1941,7 +2014,7 @@ extern "C" __global__ void scaleFCpress27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFC_Fix_27( real* DC,
+__global__ void scaleFC_Fix_27( real* DC,
                                           real* DF,
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -1964,7 +2037,7 @@ extern "C" __global__ void scaleFC_Fix_27( real* DC,
                                           unsigned int nyF,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFC_Fix_comp_27(   real* DC,
+__global__ void scaleFC_Fix_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1987,7 +2060,7 @@ extern "C" __global__ void scaleFC_Fix_comp_27(   real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_0817_comp_27(  real* DC,
+__global__ void scaleFC_0817_comp_27(  real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -2010,7 +2083,7 @@ extern "C" __global__ void scaleFC_0817_comp_27(  real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
+__global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
 													  real* DF,
 													  real* G6,
 													  unsigned int* neighborCX,
@@ -2034,7 +2107,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018( real* DC,
 													  unsigned int nyF,
 													  OffFC offFC);
 
-extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
+__global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -2059,7 +2132,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 OffFC offFC);
 
 
-extern "C" __global__ void scaleFC_staggered_time_comp_27(real* DC,
+__global__ void scaleFC_staggered_time_comp_27(real* DC,
 														  real* DF,
 														  unsigned int* neighborCX,
 														  unsigned int* neighborCY,
@@ -2082,7 +2155,7 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(real* DC,
 														  unsigned int nyF,
 														  OffFC offFC);
 
-extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC,
+__global__ void scaleFC_RhoSq_comp_27( real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -2105,7 +2178,7 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27( real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
+__global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														real* DF,
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -2128,7 +2201,7 @@ extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
 														unsigned int nyF,
 														OffFC offFC);
 
-extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
+__global__ void scaleFC_AA2016_comp_27(real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -2151,7 +2224,7 @@ extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
 												  unsigned int nyF,
 												  OffFC offFC);
 
-extern "C" __global__ void scaleFC_NSPress_27(real* DC,
+__global__ void scaleFC_NSPress_27(real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -2174,7 +2247,7 @@ extern "C" __global__ void scaleFC_NSPress_27(real* DC,
 											  unsigned int nyF,
 											  OffFC offFC);
 
-extern "C" __global__ void scaleFCThSMG7( real* DC,
+__global__ void scaleFCThSMG7( real* DC,
                                           real* DF,
                                           real* DD7C,
                                           real* DD7F,
@@ -2194,7 +2267,7 @@ extern "C" __global__ void scaleFCThSMG7( real* DC,
                                           real diffusivity_coarse,
                                           OffFC offFC);
 
-extern "C" __global__ void scaleFCThS7(real* DC,
+__global__ void scaleFCThS7(real* DC,
                                        real* DF,
                                        real* DD7C,
                                        real* DD7F,
@@ -2213,7 +2286,7 @@ extern "C" __global__ void scaleFCThS7(real* DC,
                                        real nu,
                                        real diffusivity_coarse);
 
-extern "C" __global__ void scaleFCThS27(  real* DC,
+__global__ void scaleFCThS27(  real* DC,
                                           real* DF,
                                           real* DD27C,
                                           real* DD27F,
@@ -2233,7 +2306,7 @@ extern "C" __global__ void scaleFCThS27(  real* DC,
                                           real diffusivity_coarse,
 										  OffFC offFC);
 
-extern "C" __global__ void DragLiftPost27(  real* DD,
+__global__ void DragLiftPost27(  real* DD,
 											int* k_Q,
 											real* QQ,
 											int numberOfBCnodes,
@@ -2246,7 +2319,7 @@ extern "C" __global__ void DragLiftPost27(  real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void DragLiftPre27(   real* DD,
+__global__ void DragLiftPre27(   real* DD,
 											int* k_Q,
 											real* QQ,
 											int numberOfBCnodes,
@@ -2259,7 +2332,7 @@ extern "C" __global__ void DragLiftPre27(   real* DD,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void CalcCP27(real* DD,
+__global__ void CalcCP27(real* DD,
 									int* cpIndex,
 									int nonCp,
 									double *cpPress,
@@ -2269,7 +2342,7 @@ extern "C" __global__ void CalcCP27(real* DD,
 									unsigned int size_Mat,
 									bool isEvenTimestep);
 
-extern "C" __global__ void getSendFsPre27(real* DD,
+__global__ void getSendFsPre27(real* DD,
 										  real* bufferFs,
 										  int* sendIndex,
                                           int buffmax,
@@ -2279,7 +2352,7 @@ extern "C" __global__ void getSendFsPre27(real* DD,
                                           unsigned int size_Mat,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void getSendFsPost27(real* DD,
+__global__ void getSendFsPost27(real* DD,
 										   real* bufferFs,
 										   int* sendIndex,
                                            int buffmax,
@@ -2289,7 +2362,7 @@ extern "C" __global__ void getSendFsPost27(real* DD,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void setRecvFsPre27(real* DD,
+__global__ void setRecvFsPre27(real* DD,
 										  real* bufferFs,
 										  int* recvIndex,
                                           int buffmax,
@@ -2299,7 +2372,7 @@ extern "C" __global__ void setRecvFsPre27(real* DD,
                                           unsigned int size_Mat,
                                           bool isEvenTimestep);
 
-extern "C" __global__ void setRecvFsPost27(real* DD,
+__global__ void setRecvFsPost27(real* DD,
 										   real* bufferFs,
 										   int* recvIndex,
                                            int buffmax,
@@ -2309,7 +2382,7 @@ extern "C" __global__ void setRecvFsPost27(real* DD,
                                            unsigned int size_Mat,
                                            bool isEvenTimestep);
 
-extern "C" __global__ void getSendGsF3(
+__global__ void getSendGsF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -2320,7 +2393,7 @@ extern "C" __global__ void getSendGsF3(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void setRecvGsF3(
+__global__ void setRecvGsF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -2331,7 +2404,7 @@ extern "C" __global__ void setRecvGsF3(
 	unsigned int size_Mat,
 	bool isEvenTimestep);
 
-extern "C" __global__ void WallFunction27( 	real* vx,
+__global__ void WallFunction27( 	real* vx,
 											real* vy,
 											real* vz,
 											real* DD,
@@ -2345,7 +2418,7 @@ extern "C" __global__ void WallFunction27( 	real* vx,
 											unsigned int size_Mat,
 											bool isEvenTimestep);
 
-extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
+__global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* vyD,
 														real* vzD,
 														real* vxWall,
@@ -2363,7 +2436,7 @@ extern "C" __global__ void LBSetOutputWallVelocitySP27( real* vxD,
 														real* DD,
 														bool isEvenTimestep);
 
-extern "C" __global__ void GetVeloforForcing27( real* DD,
+__global__ void GetVeloforForcing27( real* DD,
 												int* bcIndex,
 												int nonAtBC,
 												real* Vx,
@@ -2375,7 +2448,7 @@ extern "C" __global__ void GetVeloforForcing27( real* DD,
 												unsigned int size_Mat,
 												bool isEvenTimestep);
 
-extern "C" __global__ void InitParticles( real* coordX,
+__global__ void InitParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ,
 										  real* coordParticleXlocal,
@@ -2399,7 +2472,7 @@ extern "C" __global__ void InitParticles( real* coordX,
 									      unsigned int numberOfParticles,
 										  unsigned int size_Mat);
 
-extern "C" __global__ void MoveParticles( real* coordX,
+__global__ void MoveParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ,
 										  real* coordParticleXlocal,
@@ -2427,7 +2500,7 @@ extern "C" __global__ void MoveParticles( real* coordX,
 										  unsigned int size_Mat,
 										  bool isEvenTimestep);
 
-extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
+__global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  real* coordY,
 													  real* coordZ,
 													  real* coordParticleXlocal,
@@ -2455,12 +2528,12 @@ extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  unsigned int size_Mat,
 													  bool isEvenTimestep);
 
-extern "C" __global__ void initRandom(curandState* state);
+__global__ void initRandom(curandState* state);
 
-extern "C" __global__ void generateRandomValues(curandState* state,
+__global__ void generateRandomValues(curandState* state,
 												real* randArray);
 
-extern "C" __global__ void CalcTurbulenceIntensity(
+__global__ void CalcTurbulenceIntensity(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
index da438ab0fdb04c8a83bd37700b4e4735970bcd7d..4dbf525e173c4acb00ff53e70f7485852bf956ac 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/GeometryUtils.h
@@ -1,7 +1,7 @@
 #ifndef _GEOMETRYUTILS_H
 #define _GEOMETRYUTILS_H
 
-__inline__ __host__ __device__ void getNeighborIndicesOfBSW(  uint k, //index of BSW node
+__inline__ __host__ __device__ void getNeighborIndicesOfBSW(  uint k, //index of DIR_MMM node
                                         uint &ke, uint &kn, uint &kt, uint &kne, uint &kte,uint &ktn, uint &ktne,
                                         uint* neighborX, uint* neighborY, uint* neighborZ)
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
index b27df37882b684e3fc8cf3b09e39a6195baed5de..6d497d2a1ab7ec305bec4f1ad1ed2e2d63c4dc27 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Init27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBInit27( int myid,
+__global__ void LBInit27( int myid,
                                      int numprocs,
                                      real u0,
                                      unsigned int* geoD,
@@ -24,33 +24,33 @@ extern "C" __global__ void LBInit27( int myid,
                                      int maxlev)
 {
    Distributions27 D;
-   D.f[E   ] = &DD[E   *size_Mat];
-   D.f[W   ] = &DD[W   *size_Mat];
-   D.f[N   ] = &DD[N   *size_Mat];
-   D.f[S   ] = &DD[S   *size_Mat];
-   D.f[T   ] = &DD[T   *size_Mat];
-   D.f[B   ] = &DD[B   *size_Mat];
-   D.f[NE  ] = &DD[NE  *size_Mat];
-   D.f[SW  ] = &DD[SW  *size_Mat];
-   D.f[SE  ] = &DD[SE  *size_Mat];
-   D.f[NW  ] = &DD[NW  *size_Mat];
-   D.f[TE  ] = &DD[TE  *size_Mat];
-   D.f[BW  ] = &DD[BW  *size_Mat];
-   D.f[BE  ] = &DD[BE  *size_Mat];
-   D.f[TW  ] = &DD[TW  *size_Mat];
-   D.f[TN  ] = &DD[TN  *size_Mat];
-   D.f[BS  ] = &DD[BS  *size_Mat];
-   D.f[BN  ] = &DD[BN  *size_Mat];
-   D.f[TS  ] = &DD[TS  *size_Mat];
-   D.f[REST] = &DD[REST*size_Mat];
-   D.f[TNE ] = &DD[TNE *size_Mat];
-   D.f[TSW ] = &DD[TSW *size_Mat];
-   D.f[TSE ] = &DD[TSE *size_Mat];
-   D.f[TNW ] = &DD[TNW *size_Mat];
-   D.f[BNE ] = &DD[BNE *size_Mat];
-   D.f[BSW ] = &DD[BSW *size_Mat];
-   D.f[BSE ] = &DD[BSE *size_Mat];
-   D.f[BNW ] = &DD[BNW *size_Mat];
+   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    ////////////////////////////////////////////////////////////////////////////////
    unsigned int  k;                   // Zugriff auf arrays im device
    //
@@ -141,33 +141,33 @@ extern "C" __global__ void LBInit27( int myid,
 
    real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-   (D.f[REST])[kzero] =   c8o27* (drho-cu_sq);
-   (D.f[E   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-   (D.f[W   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-   (D.f[N   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-   (D.f[S   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-   (D.f[T   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-   (D.f[B   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-   (D.f[NE  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-   (D.f[SW  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-   (D.f[SE  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-   (D.f[NW  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-   (D.f[TE  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-   (D.f[BW  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-   (D.f[BE  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-   (D.f[TW  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-   (D.f[TN  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-   (D.f[BS  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-   (D.f[BN  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-   (D.f[TS  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-   (D.f[TNE ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-   (D.f[BSW ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-   (D.f[BNE ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-   (D.f[TSW ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-   (D.f[TSE ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-   (D.f[BNW ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-   (D.f[BSE ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-   (D.f[TNW ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
+   (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+   (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+   (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+   (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+   (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+   (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+   (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+   (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+   (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+   (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+   (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+   (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+   (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+   (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+   (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+   (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+   (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+   (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+   (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+   (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+   (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+   (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+   (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
 }
 ////////////////////////////////////////////////////////////////////////////////
@@ -182,7 +182,7 @@ extern "C" __global__ void LBInit27( int myid,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
+__global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
                                                 unsigned int* neighborY,
                                                 unsigned int* neighborZ,
                                                 unsigned int* neighborWSB,
@@ -218,63 +218,63 @@ extern "C" __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             Distributions27 D;
             if (EvenOrOdd==true)
             {
-                D.f[E   ] = &DD[E   *size_Mat];
-                D.f[W   ] = &DD[W   *size_Mat];
-                D.f[N   ] = &DD[N   *size_Mat];
-                D.f[S   ] = &DD[S   *size_Mat];
-                D.f[T   ] = &DD[T   *size_Mat];
-                D.f[B   ] = &DD[B   *size_Mat];
-                D.f[NE  ] = &DD[NE  *size_Mat];
-                D.f[SW  ] = &DD[SW  *size_Mat];
-                D.f[SE  ] = &DD[SE  *size_Mat];
-                D.f[NW  ] = &DD[NW  *size_Mat];
-                D.f[TE  ] = &DD[TE  *size_Mat];
-                D.f[BW  ] = &DD[BW  *size_Mat];
-                D.f[BE  ] = &DD[BE  *size_Mat];
-                D.f[TW  ] = &DD[TW  *size_Mat];
-                D.f[TN  ] = &DD[TN  *size_Mat];
-                D.f[BS  ] = &DD[BS  *size_Mat];
-                D.f[BN  ] = &DD[BN  *size_Mat];
-                D.f[TS  ] = &DD[TS  *size_Mat];
-                D.f[REST] = &DD[REST*size_Mat];
-                D.f[TNE ] = &DD[TNE *size_Mat];
-                D.f[TSW ] = &DD[TSW *size_Mat];
-                D.f[TSE ] = &DD[TSE *size_Mat];
-                D.f[TNW ] = &DD[TNW *size_Mat];
-                D.f[BNE ] = &DD[BNE *size_Mat];
-                D.f[BSW ] = &DD[BSW *size_Mat];
-                D.f[BSE ] = &DD[BSE *size_Mat];
-                D.f[BNW ] = &DD[BNW *size_Mat];
+                D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+                D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+                D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+                D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+                D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+                D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+                D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+                D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+                D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+                D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+                D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+                D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+                D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+                D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DD[DIR_000*size_Mat];
+                D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+                D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+                D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+                D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+                D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+                D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+                D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+                D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
             }
             else
             {
-                D.f[W   ] = &DD[E   *size_Mat];
-                D.f[E   ] = &DD[W   *size_Mat];
-                D.f[S   ] = &DD[N   *size_Mat];
-                D.f[N   ] = &DD[S   *size_Mat];
-                D.f[B   ] = &DD[T   *size_Mat];
-                D.f[T   ] = &DD[B   *size_Mat];
-                D.f[SW  ] = &DD[NE  *size_Mat];
-                D.f[NE  ] = &DD[SW  *size_Mat];
-                D.f[NW  ] = &DD[SE  *size_Mat];
-                D.f[SE  ] = &DD[NW  *size_Mat];
-                D.f[BW  ] = &DD[TE  *size_Mat];
-                D.f[TE  ] = &DD[BW  *size_Mat];
-                D.f[TW  ] = &DD[BE  *size_Mat];
-                D.f[BE  ] = &DD[TW  *size_Mat];
-                D.f[BS  ] = &DD[TN  *size_Mat];
-                D.f[TN  ] = &DD[BS  *size_Mat];
-                D.f[TS  ] = &DD[BN  *size_Mat];
-                D.f[BN  ] = &DD[TS  *size_Mat];
-                D.f[REST] = &DD[REST*size_Mat];
-                D.f[BSW ] = &DD[TNE *size_Mat];
-                D.f[BNE ] = &DD[TSW *size_Mat];
-                D.f[BNW ] = &DD[TSE *size_Mat];
-                D.f[BSE ] = &DD[TNW *size_Mat];
-                D.f[TSW ] = &DD[BNE *size_Mat];
-                D.f[TNE ] = &DD[BSW *size_Mat];
-                D.f[TNW ] = &DD[BSE *size_Mat];
-                D.f[TSE ] = &DD[BNW *size_Mat];
+                D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+                D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+                D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+                D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+                D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+                D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+                D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+                D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+                D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+                D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+                D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+                D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+                D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+                D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DD[DIR_000*size_Mat];
+                D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+                D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+                D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+                D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+                D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+                D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+                D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+                D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
             }
             //////////////////////////////////////////////////////////////////////////
             real drho = rho[k];//0.0f;//
@@ -396,63 +396,63 @@ extern "C" __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
             //////////////////////////////////////////////////////////////////////////
             real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
             
-            (D.f[REST])[kzero] =   c8o27* (drho-cu_sq);
-            (D.f[E   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-            (D.f[W   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-            (D.f[N   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-            (D.f[S   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-            (D.f[T   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-            (D.f[B   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-            (D.f[NE  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-            (D.f[SW  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-            (D.f[SE  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-            (D.f[NW  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-            (D.f[TE  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-            (D.f[BW  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-            (D.f[BE  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-            (D.f[TW  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-            (D.f[TN  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-            (D.f[BS  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-            (D.f[BN  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-            (D.f[TS  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-            (D.f[TNE ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-            (D.f[BSW ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-            (D.f[BNE ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-            (D.f[TSW ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-            (D.f[TSE ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-            (D.f[BNW ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-            (D.f[BSE ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-            (D.f[TNW ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
+            (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+            (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+            (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+            (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+            (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+            (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+            (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+            (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+            (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+            (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+            (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+            (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+            (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+            (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+            (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+            (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+            (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+            (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+            (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+            (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+            (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+            (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+            (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 
             //////////////////////////////////////////////////////////////////////////
 
-            (D.f[REST])[kzero] += f_ZERO;
-            (D.f[E   ])[ke   ] += f_E   ;
-            (D.f[W   ])[kw   ] += f_E   ;
-            (D.f[N   ])[kn   ] += f_N   ;
-            (D.f[S   ])[ks   ] += f_N   ;
-            (D.f[T   ])[kt   ] += f_T   ;
-            (D.f[B   ])[kb   ] += f_T   ;
-            (D.f[NE  ])[kne  ] += f_NE  ;
-            (D.f[SW  ])[ksw  ] += f_NE  ;
-            (D.f[SE  ])[kse  ] += f_SE  ;
-            (D.f[NW  ])[knw  ] += f_SE  ;
-            (D.f[TE  ])[kte  ] += f_TE  ;
-            (D.f[BW  ])[kbw  ] += f_TE  ;
-            (D.f[BE  ])[kbe  ] += f_BE  ;
-            (D.f[TW  ])[ktw  ] += f_BE  ;
-            (D.f[TN  ])[ktn  ] += f_TN  ;
-            (D.f[BS  ])[kbs  ] += f_TN  ;
-            (D.f[BN  ])[kbn  ] += f_BN  ;
-            (D.f[TS  ])[kts  ] += f_BN  ;
-            (D.f[TNE ])[ktne ] += f_TNE ;
-            (D.f[BSW ])[kbsw ] += f_TNE ;
-            (D.f[BNE ])[kbne ] += f_TSW ;
-            (D.f[TSW ])[ktsw ] += f_TSW ;
-            (D.f[TSE ])[ktse ] += f_TSE ;
-            (D.f[BNW ])[kbnw ] += f_TSE ;
-            (D.f[BSE ])[kbse ] += f_TNW ;
-            (D.f[TNW ])[ktnw ] += f_TNW ;
+            (D.f[DIR_000])[kzero] += f_ZERO;
+            (D.f[DIR_P00   ])[ke   ] += f_E   ;
+            (D.f[DIR_M00   ])[kw   ] += f_E   ;
+            (D.f[DIR_0P0   ])[kn   ] += f_N   ;
+            (D.f[DIR_0M0   ])[ks   ] += f_N   ;
+            (D.f[DIR_00P   ])[kt   ] += f_T   ;
+            (D.f[DIR_00M   ])[kb   ] += f_T   ;
+            (D.f[DIR_PP0  ])[kne  ] += f_NE  ;
+            (D.f[DIR_MM0  ])[ksw  ] += f_NE  ;
+            (D.f[DIR_PM0  ])[kse  ] += f_SE  ;
+            (D.f[DIR_MP0  ])[knw  ] += f_SE  ;
+            (D.f[DIR_P0P  ])[kte  ] += f_TE  ;
+            (D.f[DIR_M0M  ])[kbw  ] += f_TE  ;
+            (D.f[DIR_P0M  ])[kbe  ] += f_BE  ;
+            (D.f[DIR_M0P  ])[ktw  ] += f_BE  ;
+            (D.f[DIR_0PP  ])[ktn  ] += f_TN  ;
+            (D.f[DIR_0MM  ])[kbs  ] += f_TN  ;
+            (D.f[DIR_0PM  ])[kbn  ] += f_BN  ;
+            (D.f[DIR_0MP  ])[kts  ] += f_BN  ;
+            (D.f[DIR_PPP ])[ktne ] += f_TNE ;
+            (D.f[DIR_MMM ])[kbsw ] += f_TNE ;
+            (D.f[DIR_PPM ])[kbne ] += f_TSW ;
+            (D.f[DIR_MMP ])[ktsw ] += f_TSW ;
+            (D.f[DIR_PMP ])[ktse ] += f_TSE ;
+            (D.f[DIR_MPM ])[kbnw ] += f_TSE ;
+            (D.f[DIR_PMM ])[kbse ] += f_TNW ;
+            (D.f[DIR_MPP ])[ktnw ] += f_TNW ;
 
             //////////////////////////////////////////////////////////////////////////
         }
@@ -460,9 +460,9 @@ extern "C" __global__ void LBInitNonEqPartSP27( unsigned int* neighborX,
 	    {
 		    //////////////////////////////////////////////////////////////////////////
 		    Distributions27 D;
-		    D.f[REST] = &DD[REST*size_Mat];
+		    D.f[DIR_000] = &DD[DIR_000*size_Mat];
 		    //////////////////////////////////////////////////////////////////////////
-		    (D.f[REST])[k] = c96o1;
+		    (D.f[DIR_000])[k] = c96o1;
 		    //////////////////////////////////////////////////////////////////////////
 	    }
    }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
index 38e409730a8316f9f8b620d931d25aadc80d4b01..c091aa8b9a29017ddc0f6ea6584e805d7afc4859 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/InitAdvectionDiffusion27.cu
@@ -38,7 +38,7 @@
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
-extern "C" __global__ void InitAD27(
+__global__ void InitAD27(
 	uint* neighborX,
 	uint* neighborY,
 	uint* neighborZ,
@@ -77,63 +77,63 @@ extern "C" __global__ void InitAD27(
 		Distributions27 distAD;
 		if (isEvenTimestep)
 		{
-			distAD.f[E   ] = &distributionsAD[E   *size_Mat];
-			distAD.f[W   ] = &distributionsAD[W   *size_Mat];
-			distAD.f[N   ] = &distributionsAD[N   *size_Mat];
-			distAD.f[S   ] = &distributionsAD[S   *size_Mat];
-			distAD.f[T   ] = &distributionsAD[T   *size_Mat];
-			distAD.f[B   ] = &distributionsAD[B   *size_Mat];
-			distAD.f[NE  ] = &distributionsAD[NE  *size_Mat];
-			distAD.f[SW  ] = &distributionsAD[SW  *size_Mat];
-			distAD.f[SE  ] = &distributionsAD[SE  *size_Mat];
-			distAD.f[NW  ] = &distributionsAD[NW  *size_Mat];
-			distAD.f[TE  ] = &distributionsAD[TE  *size_Mat];
-			distAD.f[BW  ] = &distributionsAD[BW  *size_Mat];
-			distAD.f[BE  ] = &distributionsAD[BE  *size_Mat];
-			distAD.f[TW  ] = &distributionsAD[TW  *size_Mat];
-			distAD.f[TN  ] = &distributionsAD[TN  *size_Mat];
-			distAD.f[BS  ] = &distributionsAD[BS  *size_Mat];
-			distAD.f[BN  ] = &distributionsAD[BN  *size_Mat];
-			distAD.f[TS  ] = &distributionsAD[TS  *size_Mat];
-			distAD.f[REST] = &distributionsAD[REST*size_Mat];
-			distAD.f[TNE ] = &distributionsAD[TNE *size_Mat];
-			distAD.f[TSW ] = &distributionsAD[TSW *size_Mat];
-			distAD.f[TSE ] = &distributionsAD[TSE *size_Mat];
-			distAD.f[TNW ] = &distributionsAD[TNW *size_Mat];
-			distAD.f[BNE ] = &distributionsAD[BNE *size_Mat];
-			distAD.f[BSW ] = &distributionsAD[BSW *size_Mat];
-			distAD.f[BSE ] = &distributionsAD[BSE *size_Mat];
-			distAD.f[BNW ] = &distributionsAD[BNW *size_Mat];
+			distAD.f[DIR_P00   ] = &distributionsAD[DIR_P00   *size_Mat];
+			distAD.f[DIR_M00   ] = &distributionsAD[DIR_M00   *size_Mat];
+			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00P   *size_Mat];
+			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00M   *size_Mat];
+			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_P0P  *size_Mat];
+			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_M0M  *size_Mat];
+			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_P0M  *size_Mat];
+			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_M0P  *size_Mat];
+			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0PP  *size_Mat];
+			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0MM  *size_Mat];
+			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0PM  *size_Mat];
+			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0MP  *size_Mat];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+			distAD.f[DIR_PPP ] = &distributionsAD[DIR_PPP *size_Mat];
+			distAD.f[DIR_MMP ] = &distributionsAD[DIR_MMP *size_Mat];
+			distAD.f[DIR_PMP ] = &distributionsAD[DIR_PMP *size_Mat];
+			distAD.f[DIR_MPP ] = &distributionsAD[DIR_MPP *size_Mat];
+			distAD.f[DIR_PPM ] = &distributionsAD[DIR_PPM *size_Mat];
+			distAD.f[DIR_MMM ] = &distributionsAD[DIR_MMM *size_Mat];
+			distAD.f[DIR_PMM ] = &distributionsAD[DIR_PMM *size_Mat];
+			distAD.f[DIR_MPM ] = &distributionsAD[DIR_MPM *size_Mat];
 		}
 		else
 		{
-			distAD.f[W   ] = &distributionsAD[E   *size_Mat];
-			distAD.f[E   ] = &distributionsAD[W   *size_Mat];
-			distAD.f[S   ] = &distributionsAD[N   *size_Mat];
-			distAD.f[N   ] = &distributionsAD[S   *size_Mat];
-			distAD.f[B   ] = &distributionsAD[T   *size_Mat];
-			distAD.f[T   ] = &distributionsAD[B   *size_Mat];
-			distAD.f[SW  ] = &distributionsAD[NE  *size_Mat];
-			distAD.f[NE  ] = &distributionsAD[SW  *size_Mat];
-			distAD.f[NW  ] = &distributionsAD[SE  *size_Mat];
-			distAD.f[SE  ] = &distributionsAD[NW  *size_Mat];
-			distAD.f[BW  ] = &distributionsAD[TE  *size_Mat];
-			distAD.f[TE  ] = &distributionsAD[BW  *size_Mat];
-			distAD.f[TW  ] = &distributionsAD[BE  *size_Mat];
-			distAD.f[BE  ] = &distributionsAD[TW  *size_Mat];
-			distAD.f[BS  ] = &distributionsAD[TN  *size_Mat];
-			distAD.f[TN  ] = &distributionsAD[BS  *size_Mat];
-			distAD.f[TS  ] = &distributionsAD[BN  *size_Mat];
-			distAD.f[BN  ] = &distributionsAD[TS  *size_Mat];
-			distAD.f[REST] = &distributionsAD[REST*size_Mat];
-			distAD.f[BSW ] = &distributionsAD[TNE *size_Mat];
-			distAD.f[BNE ] = &distributionsAD[TSW *size_Mat];
-			distAD.f[BNW ] = &distributionsAD[TSE *size_Mat];
-			distAD.f[BSE ] = &distributionsAD[TNW *size_Mat];
-			distAD.f[TSW ] = &distributionsAD[BNE *size_Mat];
-			distAD.f[TNE ] = &distributionsAD[BSW *size_Mat];
-			distAD.f[TNW ] = &distributionsAD[BSE *size_Mat];
-			distAD.f[TSE ] = &distributionsAD[BNW *size_Mat];
+			distAD.f[DIR_M00   ] = &distributionsAD[DIR_P00   *size_Mat];
+			distAD.f[DIR_P00   ] = &distributionsAD[DIR_M00   *size_Mat];
+			distAD.f[DIR_0M0   ] = &distributionsAD[DIR_0P0   *size_Mat];
+			distAD.f[DIR_0P0   ] = &distributionsAD[DIR_0M0   *size_Mat];
+			distAD.f[DIR_00M   ] = &distributionsAD[DIR_00P   *size_Mat];
+			distAD.f[DIR_00P   ] = &distributionsAD[DIR_00M   *size_Mat];
+			distAD.f[DIR_MM0  ] = &distributionsAD[DIR_PP0  *size_Mat];
+			distAD.f[DIR_PP0  ] = &distributionsAD[DIR_MM0  *size_Mat];
+			distAD.f[DIR_MP0  ] = &distributionsAD[DIR_PM0  *size_Mat];
+			distAD.f[DIR_PM0  ] = &distributionsAD[DIR_MP0  *size_Mat];
+			distAD.f[DIR_M0M  ] = &distributionsAD[DIR_P0P  *size_Mat];
+			distAD.f[DIR_P0P  ] = &distributionsAD[DIR_M0M  *size_Mat];
+			distAD.f[DIR_M0P  ] = &distributionsAD[DIR_P0M  *size_Mat];
+			distAD.f[DIR_P0M  ] = &distributionsAD[DIR_M0P  *size_Mat];
+			distAD.f[DIR_0MM  ] = &distributionsAD[DIR_0PP  *size_Mat];
+			distAD.f[DIR_0PP  ] = &distributionsAD[DIR_0MM  *size_Mat];
+			distAD.f[DIR_0MP  ] = &distributionsAD[DIR_0PM  *size_Mat];
+			distAD.f[DIR_0PM  ] = &distributionsAD[DIR_0MP  *size_Mat];
+			distAD.f[DIR_000] = &distributionsAD[DIR_000*size_Mat];
+			distAD.f[DIR_MMM ] = &distributionsAD[DIR_PPP *size_Mat];
+			distAD.f[DIR_PPM ] = &distributionsAD[DIR_MMP *size_Mat];
+			distAD.f[DIR_MPM ] = &distributionsAD[DIR_PMP *size_Mat];
+			distAD.f[DIR_PMM ] = &distributionsAD[DIR_MPP *size_Mat];
+			distAD.f[DIR_MMP ] = &distributionsAD[DIR_PPM *size_Mat];
+			distAD.f[DIR_PPP ] = &distributionsAD[DIR_MMM *size_Mat];
+			distAD.f[DIR_MPP ] = &distributionsAD[DIR_PMM *size_Mat];
+			distAD.f[DIR_PMP ] = &distributionsAD[DIR_MPM *size_Mat];
 		}
 		//////////////////////////////////////////////////////////////////////////
 		//! - Set local velocities and concetration
@@ -177,33 +177,33 @@ extern "C" __global__ void InitAD27(
 		//!
 		real cu_sq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 
-		(distAD.f[REST])[kzero] = c8o27  * conc * (c1o1 - cu_sq);
-		(distAD.f[E   ])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
-		(distAD.f[W   ])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
-		(distAD.f[N   ])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
-		(distAD.f[S   ])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
-		(distAD.f[T   ])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
-		(distAD.f[B   ])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
-		(distAD.f[NE  ])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
-		(distAD.f[SW  ])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
-		(distAD.f[SE  ])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
-		(distAD.f[NW  ])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
-		(distAD.f[TE  ])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
-		(distAD.f[BW  ])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
-		(distAD.f[BE  ])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
-		(distAD.f[TW  ])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
-		(distAD.f[TN  ])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
-		(distAD.f[BS  ])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
-		(distAD.f[BN  ])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
-		(distAD.f[TS  ])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
-		(distAD.f[TNE ])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
-		(distAD.f[BSW ])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[BNE ])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[TSW ])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[TSE ])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
-		(distAD.f[BNW ])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
-		(distAD.f[BSE ])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
-		(distAD.f[TNW ])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_000])[kzero] = c8o27  * conc * (c1o1 - cu_sq);
+		(distAD.f[DIR_P00   ])[ke   ] = c2o27  * conc * (c1o1 + c3o1 * ( vx1            ) + c9o2 * ( vx1            ) * ( vx1            ) - cu_sq);
+		(distAD.f[DIR_M00   ])[kw   ] = c2o27  * conc * (c1o1 + c3o1 * (-vx1            ) + c9o2 * (-vx1            ) * (-vx1            ) - cu_sq);
+		(distAD.f[DIR_0P0   ])[kn   ] = c2o27  * conc * (c1o1 + c3o1 * (       vx2      ) + c9o2 * (       vx2      ) * (       vx2      ) - cu_sq);
+		(distAD.f[DIR_0M0   ])[ks   ] = c2o27  * conc * (c1o1 + c3o1 * (     - vx2      ) + c9o2 * (     - vx2      ) * (     - vx2      ) - cu_sq);
+		(distAD.f[DIR_00P   ])[kt   ] = c2o27  * conc * (c1o1 + c3o1 * (             vx3) + c9o2 * (             vx3) * (             vx3) - cu_sq);
+		(distAD.f[DIR_00M   ])[kb   ] = c2o27  * conc * (c1o1 + c3o1 * (           - vx3) + c9o2 * (           - vx3) * (           - vx3) - cu_sq);
+		(distAD.f[DIR_PP0  ])[kne  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 + vx2      ) + c9o2 * ( vx1 + vx2      ) * ( vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_MM0  ])[ksw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 - vx2      ) + c9o2 * (-vx1 - vx2      ) * (-vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_PM0  ])[kse  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1 - vx2      ) + c9o2 * ( vx1 - vx2      ) * ( vx1 - vx2      ) - cu_sq);
+		(distAD.f[DIR_MP0  ])[knw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1 + vx2      ) + c9o2 * (-vx1 + vx2      ) * (-vx1 + vx2      ) - cu_sq);
+		(distAD.f[DIR_P0P  ])[kte  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       + vx3) + c9o2 * ( vx1       + vx3) * ( vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_M0M  ])[kbw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       - vx3) + c9o2 * (-vx1       - vx3) * (-vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_P0M  ])[kbe  ] = c1o54  * conc * (c1o1 + c3o1 * ( vx1       - vx3) + c9o2 * ( vx1       - vx3) * ( vx1       - vx3) - cu_sq);
+		(distAD.f[DIR_M0P  ])[ktw  ] = c1o54  * conc * (c1o1 + c3o1 * (-vx1       + vx3) + c9o2 * (-vx1       + vx3) * (-vx1       + vx3) - cu_sq);
+		(distAD.f[DIR_0PP  ])[ktn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 + vx3) + c9o2 * (       vx2 + vx3) * (       vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_0MM  ])[kbs  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 - vx3) + c9o2 * (     - vx2 - vx3) * (     - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0PM  ])[kbn  ] = c1o54  * conc * (c1o1 + c3o1 * (       vx2 - vx3) + c9o2 * (       vx2 - vx3) * (       vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_0MP  ])[kts  ] = c1o54  * conc * (c1o1 + c3o1 * (     - vx2 + vx3) + c9o2 * (     - vx2 + vx3) * (     - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PPP ])[ktne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 + vx3) + c9o2 * ( vx1 + vx2 + vx3) * ( vx1 + vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MMM ])[kbsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 - vx3) + c9o2 * (-vx1 - vx2 - vx3) * (-vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PPM ])[kbne ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 + vx2 - vx3) + c9o2 * ( vx1 + vx2 - vx3) * ( vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MMP ])[ktsw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 - vx2 + vx3) + c9o2 * (-vx1 - vx2 + vx3) * (-vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_PMP ])[ktse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 + vx3) + c9o2 * ( vx1 - vx2 + vx3) * ( vx1 - vx2 + vx3) - cu_sq);
+		(distAD.f[DIR_MPM ])[kbnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 - vx3) + c9o2 * (-vx1 + vx2 - vx3) * (-vx1 + vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_PMM ])[kbse ] = c1o216 * conc * (c1o1 + c3o1 * ( vx1 - vx2 - vx3) + c9o2 * ( vx1 - vx2 - vx3) * ( vx1 - vx2 - vx3) - cu_sq);
+		(distAD.f[DIR_MPP ])[ktnw ] = c1o216 * conc * (c1o1 + c3o1 * (-vx1 + vx2 + vx3) + c9o2 * (-vx1 + vx2 + vx3) * (-vx1 + vx2 + vx3) - cu_sq);
 	}
 }
 
@@ -229,7 +229,7 @@ extern "C" __global__ void InitAD27(
 // DEPRECATED (2022)
 
 // ////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void InitAD27(unsigned int* neighborX,
+// __global__ void InitAD27(unsigned int* neighborX,
 //                                        unsigned int* neighborY,
 //                                        unsigned int* neighborZ,
 //                                        unsigned int* geoD,
@@ -263,63 +263,63 @@ extern "C" __global__ void InitAD27(
 //          Distributions27 D27;
 //          if (EvenOrOdd==true)
 //          {
-//             D27.f[E   ] = &DD27[E   *size_Mat];
-//             D27.f[W   ] = &DD27[W   *size_Mat];
-//             D27.f[N   ] = &DD27[N   *size_Mat];
-//             D27.f[S   ] = &DD27[S   *size_Mat];
-//             D27.f[T   ] = &DD27[T   *size_Mat];
-//             D27.f[B   ] = &DD27[B   *size_Mat];
-//             D27.f[NE  ] = &DD27[NE  *size_Mat];
-//             D27.f[SW  ] = &DD27[SW  *size_Mat];
-//             D27.f[SE  ] = &DD27[SE  *size_Mat];
-//             D27.f[NW  ] = &DD27[NW  *size_Mat];
-//             D27.f[TE  ] = &DD27[TE  *size_Mat];
-//             D27.f[BW  ] = &DD27[BW  *size_Mat];
-//             D27.f[BE  ] = &DD27[BE  *size_Mat];
-//             D27.f[TW  ] = &DD27[TW  *size_Mat];
-//             D27.f[TN  ] = &DD27[TN  *size_Mat];
-//             D27.f[BS  ] = &DD27[BS  *size_Mat];
-//             D27.f[BN  ] = &DD27[BN  *size_Mat];
-//             D27.f[TS  ] = &DD27[TS  *size_Mat];
-//             D27.f[REST] = &DD27[REST*size_Mat];
-//             D27.f[TNE ] = &DD27[TNE *size_Mat];
-//             D27.f[TSW ] = &DD27[TSW *size_Mat];
-//             D27.f[TSE ] = &DD27[TSE *size_Mat];
-//             D27.f[TNW ] = &DD27[TNW *size_Mat];
-//             D27.f[BNE ] = &DD27[BNE *size_Mat];
-//             D27.f[BSW ] = &DD27[BSW *size_Mat];
-//             D27.f[BSE ] = &DD27[BSE *size_Mat];
-//             D27.f[BNW ] = &DD27[BNW *size_Mat];
+//             D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+//             D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+//             D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+//             D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+//             D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+//             D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+//             D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+//             D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+//             D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+//             D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+//             D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+//             D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+//             D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+//             D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+//             D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+//             D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+//             D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+//             D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+//             D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+//             D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+//             D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+//             D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+//             D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+//             D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+//             D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+//             D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
 //          }
 //          else
 //          {
-//             D27.f[W   ] = &DD27[E   *size_Mat];
-//             D27.f[E   ] = &DD27[W   *size_Mat];
-//             D27.f[S   ] = &DD27[N   *size_Mat];
-//             D27.f[N   ] = &DD27[S   *size_Mat];
-//             D27.f[B   ] = &DD27[T   *size_Mat];
-//             D27.f[T   ] = &DD27[B   *size_Mat];
-//             D27.f[SW  ] = &DD27[NE  *size_Mat];
-//             D27.f[NE  ] = &DD27[SW  *size_Mat];
-//             D27.f[NW  ] = &DD27[SE  *size_Mat];
-//             D27.f[SE  ] = &DD27[NW  *size_Mat];
-//             D27.f[BW  ] = &DD27[TE  *size_Mat];
-//             D27.f[TE  ] = &DD27[BW  *size_Mat];
-//             D27.f[TW  ] = &DD27[BE  *size_Mat];
-//             D27.f[BE  ] = &DD27[TW  *size_Mat];
-//             D27.f[BS  ] = &DD27[TN  *size_Mat];
-//             D27.f[TN  ] = &DD27[BS  *size_Mat];
-//             D27.f[TS  ] = &DD27[BN  *size_Mat];
-//             D27.f[BN  ] = &DD27[TS  *size_Mat];
-//             D27.f[REST] = &DD27[REST*size_Mat];
-//             D27.f[BSW ] = &DD27[TNE *size_Mat];
-//             D27.f[BNE ] = &DD27[TSW *size_Mat];
-//             D27.f[BNW ] = &DD27[TSE *size_Mat];
-//             D27.f[BSE ] = &DD27[TNW *size_Mat];
-//             D27.f[TSW ] = &DD27[BNE *size_Mat];
-//             D27.f[TNE ] = &DD27[BSW *size_Mat];
-//             D27.f[TNW ] = &DD27[BSE *size_Mat];
-//             D27.f[TSE ] = &DD27[BNW *size_Mat];
+//             D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+//             D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+//             D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+//             D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+//             D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+//             D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+//             D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+//             D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+//             D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+//             D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+//             D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+//             D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+//             D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+//             D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+//             D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+//             D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+//             D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+//             D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+//             D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+//             D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+//             D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+//             D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+//             D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+//             D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+//             D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+//             D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+//             D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
 //          }
 //          //////////////////////////////////////////////////////////////////////////
 //          real ConcD = Conc[k];
@@ -390,33 +390,33 @@ extern "C" __global__ void InitAD27(
 //          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-//          (D27.f[REST])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
-//          (D27.f[E   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-//          (D27.f[W   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-//          (D27.f[N   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-//          (D27.f[S   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-//          (D27.f[T   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-//          (D27.f[B   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-//          (D27.f[NE  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-//          (D27.f[SW  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-//          (D27.f[SE  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-//          (D27.f[NW  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-//          (D27.f[TE  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-//          (D27.f[BW  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-//          (D27.f[BE  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-//          (D27.f[TW  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-//          (D27.f[TN  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-//          (D27.f[BS  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-//          (D27.f[BN  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-//          (D27.f[TS  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-//          (D27.f[TNE ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-//          (D27.f[BSW ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-//          (D27.f[BNE ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-//          (D27.f[TSW ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-//          (D27.f[TSE ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-//          (D27.f[BNW ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-//          (D27.f[BSE ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-//          (D27.f[TNW ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_000])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
+//          (D27.f[DIR_P00   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+//          (D27.f[DIR_M00   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+//          (D27.f[DIR_0P0   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+//          (D27.f[DIR_0M0   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+//          (D27.f[DIR_00P   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+//          (D27.f[DIR_00M   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+//          (D27.f[DIR_PP0  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_MM0  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_PM0  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+//          (D27.f[DIR_MP0  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+//          (D27.f[DIR_P0P  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_M0M  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_P0M  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+//          (D27.f[DIR_M0P  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+//          (D27.f[DIR_0PP  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+//          (D27.f[DIR_0MM  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0PM  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+//          (D27.f[DIR_0MP  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PPP ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MMM ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PPM ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MMP ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_PMP ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+//          (D27.f[DIR_MPM ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+//          (D27.f[DIR_PMM ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+//          (D27.f[DIR_MPP ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 //          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 //       }
 //    }
@@ -440,7 +440,7 @@ extern "C" __global__ void InitAD27(
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void InitAD7( unsigned int* neighborX,
+__global__ void InitAD7( unsigned int* neighborX,
                                     unsigned int* neighborY,
                                     unsigned int* neighborZ,
                                     unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
index 34082639e496aee4e3335dcc753aa7bc9e105cd0..2f6a11aa17398b65858508c3f94b241c16551b37 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
+++ b/src/gpu/VirtualFluids_GPU/GPU/KernelUtilities.h
@@ -44,95 +44,95 @@ __inline__ __device__ void getPointersToDistributions(Distributions27 &dist, rea
 {
     if (isEvenTimestep)
     {
-        dist.f[E   ] = &distributionArray[E   *numberOfLBnodes];
-        dist.f[W   ] = &distributionArray[W   *numberOfLBnodes];
-        dist.f[N   ] = &distributionArray[N   *numberOfLBnodes];
-        dist.f[S   ] = &distributionArray[S   *numberOfLBnodes];
-        dist.f[T   ] = &distributionArray[T   *numberOfLBnodes];
-        dist.f[B   ] = &distributionArray[B   *numberOfLBnodes];
-        dist.f[NE  ] = &distributionArray[NE  *numberOfLBnodes];
-        dist.f[SW  ] = &distributionArray[SW  *numberOfLBnodes];
-        dist.f[SE  ] = &distributionArray[SE  *numberOfLBnodes];
-        dist.f[NW  ] = &distributionArray[NW  *numberOfLBnodes];
-        dist.f[TE  ] = &distributionArray[TE  *numberOfLBnodes];
-        dist.f[BW  ] = &distributionArray[BW  *numberOfLBnodes];
-        dist.f[BE  ] = &distributionArray[BE  *numberOfLBnodes];
-        dist.f[TW  ] = &distributionArray[TW  *numberOfLBnodes];
-        dist.f[TN  ] = &distributionArray[TN  *numberOfLBnodes];
-        dist.f[BS  ] = &distributionArray[BS  *numberOfLBnodes];
-        dist.f[BN  ] = &distributionArray[BN  *numberOfLBnodes];
-        dist.f[TS  ] = &distributionArray[TS  *numberOfLBnodes];
-        dist.f[REST] = &distributionArray[REST*numberOfLBnodes];
-        dist.f[TNE ] = &distributionArray[TNE *numberOfLBnodes];
-        dist.f[TSW ] = &distributionArray[TSW *numberOfLBnodes];
-        dist.f[TSE ] = &distributionArray[TSE *numberOfLBnodes];
-        dist.f[TNW ] = &distributionArray[TNW *numberOfLBnodes];
-        dist.f[BNE ] = &distributionArray[BNE *numberOfLBnodes];
-        dist.f[BSW ] = &distributionArray[BSW *numberOfLBnodes];
-        dist.f[BSE ] = &distributionArray[BSE *numberOfLBnodes];
-        dist.f[BNW ] = &distributionArray[BNW *numberOfLBnodes];
+        dist.f[DIR_P00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
+        dist.f[DIR_M00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
+        dist.f[DIR_0P0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
+        dist.f[DIR_0M0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
+        dist.f[DIR_00P   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
+        dist.f[DIR_00M   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
+        dist.f[DIR_PP0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
+        dist.f[DIR_MM0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
+        dist.f[DIR_PM0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
+        dist.f[DIR_MP0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
+        dist.f[DIR_P0P  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
+        dist.f[DIR_M0M  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
+        dist.f[DIR_P0M  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
+        dist.f[DIR_M0P  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
+        dist.f[DIR_0PP  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
+        dist.f[DIR_0MM  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
+        dist.f[DIR_0PM  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
+        dist.f[DIR_0MP  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
+        dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
+        dist.f[DIR_PPP ] = &distributionArray[DIR_PPP *numberOfLBnodes];
+        dist.f[DIR_MMP ] = &distributionArray[DIR_MMP *numberOfLBnodes];
+        dist.f[DIR_PMP ] = &distributionArray[DIR_PMP *numberOfLBnodes];
+        dist.f[DIR_MPP ] = &distributionArray[DIR_MPP *numberOfLBnodes];
+        dist.f[DIR_PPM ] = &distributionArray[DIR_PPM *numberOfLBnodes];
+        dist.f[DIR_MMM ] = &distributionArray[DIR_MMM *numberOfLBnodes];
+        dist.f[DIR_PMM ] = &distributionArray[DIR_PMM *numberOfLBnodes];
+        dist.f[DIR_MPM ] = &distributionArray[DIR_MPM *numberOfLBnodes];
     }
     else
     {
-         dist.f[W   ] = &distributionArray[E   *numberOfLBnodes];
-         dist.f[E   ] = &distributionArray[W   *numberOfLBnodes];
-         dist.f[S   ] = &distributionArray[N   *numberOfLBnodes];
-         dist.f[N   ] = &distributionArray[S   *numberOfLBnodes];
-         dist.f[B   ] = &distributionArray[T   *numberOfLBnodes];
-         dist.f[T   ] = &distributionArray[B   *numberOfLBnodes];
-         dist.f[SW  ] = &distributionArray[NE  *numberOfLBnodes];
-         dist.f[NE  ] = &distributionArray[SW  *numberOfLBnodes];
-         dist.f[NW  ] = &distributionArray[SE  *numberOfLBnodes];
-         dist.f[SE  ] = &distributionArray[NW  *numberOfLBnodes];
-         dist.f[BW  ] = &distributionArray[TE  *numberOfLBnodes];
-         dist.f[TE  ] = &distributionArray[BW  *numberOfLBnodes];
-         dist.f[TW  ] = &distributionArray[BE  *numberOfLBnodes];
-         dist.f[BE  ] = &distributionArray[TW  *numberOfLBnodes];
-         dist.f[BS  ] = &distributionArray[TN  *numberOfLBnodes];
-         dist.f[TN  ] = &distributionArray[BS  *numberOfLBnodes];
-         dist.f[TS  ] = &distributionArray[BN  *numberOfLBnodes];
-         dist.f[BN  ] = &distributionArray[TS  *numberOfLBnodes];
-         dist.f[REST] = &distributionArray[REST*numberOfLBnodes];
-         dist.f[TNE ] = &distributionArray[BSW *numberOfLBnodes];
-         dist.f[TSW ] = &distributionArray[BNE *numberOfLBnodes];
-         dist.f[TSE ] = &distributionArray[BNW *numberOfLBnodes];
-         dist.f[TNW ] = &distributionArray[BSE *numberOfLBnodes];
-         dist.f[BNE ] = &distributionArray[TSW *numberOfLBnodes];
-         dist.f[BSW ] = &distributionArray[TNE *numberOfLBnodes];
-         dist.f[BSE ] = &distributionArray[TNW *numberOfLBnodes];
-         dist.f[BNW ] = &distributionArray[TSE *numberOfLBnodes];
+         dist.f[DIR_M00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
+         dist.f[DIR_P00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
+         dist.f[DIR_0M0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
+         dist.f[DIR_0P0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
+         dist.f[DIR_00M   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
+         dist.f[DIR_00P   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
+         dist.f[DIR_MM0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
+         dist.f[DIR_PP0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
+         dist.f[DIR_MP0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
+         dist.f[DIR_PM0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
+         dist.f[DIR_M0M  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
+         dist.f[DIR_P0P  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
+         dist.f[DIR_M0P  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
+         dist.f[DIR_P0M  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
+         dist.f[DIR_0MM  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
+         dist.f[DIR_0PP  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
+         dist.f[DIR_0MP  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
+         dist.f[DIR_0PM  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
+         dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
+         dist.f[DIR_PPP ] = &distributionArray[DIR_MMM *numberOfLBnodes];
+         dist.f[DIR_MMP ] = &distributionArray[DIR_PPM *numberOfLBnodes];
+         dist.f[DIR_PMP ] = &distributionArray[DIR_MPM *numberOfLBnodes];
+         dist.f[DIR_MPP ] = &distributionArray[DIR_PMM *numberOfLBnodes];
+         dist.f[DIR_PPM ] = &distributionArray[DIR_MMP *numberOfLBnodes];
+         dist.f[DIR_MMM ] = &distributionArray[DIR_PPP *numberOfLBnodes];
+         dist.f[DIR_PMM ] = &distributionArray[DIR_MPP *numberOfLBnodes];
+         dist.f[DIR_MPM ] = &distributionArray[DIR_PMP *numberOfLBnodes];
     }
 }
 
 __inline__ __device__ void getPointersToSubgridDistances(SubgridDistances27& subgridD, real* subgridDistances, const unsigned int numberOfSubgridIndices)
 {
-    subgridD.q[E   ] = &subgridDistances[E    *numberOfSubgridIndices];
-    subgridD.q[W   ] = &subgridDistances[W    *numberOfSubgridIndices];
-    subgridD.q[N   ] = &subgridDistances[N    *numberOfSubgridIndices];
-    subgridD.q[S   ] = &subgridDistances[S    *numberOfSubgridIndices];
-    subgridD.q[T   ] = &subgridDistances[T    *numberOfSubgridIndices];
-    subgridD.q[B   ] = &subgridDistances[B    *numberOfSubgridIndices];
-    subgridD.q[NE  ] = &subgridDistances[NE   *numberOfSubgridIndices];
-    subgridD.q[SW  ] = &subgridDistances[SW   *numberOfSubgridIndices];
-    subgridD.q[SE  ] = &subgridDistances[SE   *numberOfSubgridIndices];
-    subgridD.q[NW  ] = &subgridDistances[NW   *numberOfSubgridIndices];
-    subgridD.q[TE  ] = &subgridDistances[TE   *numberOfSubgridIndices];
-    subgridD.q[BW  ] = &subgridDistances[BW   *numberOfSubgridIndices];
-    subgridD.q[BE  ] = &subgridDistances[BE   *numberOfSubgridIndices];
-    subgridD.q[TW  ] = &subgridDistances[TW   *numberOfSubgridIndices];
-    subgridD.q[TN  ] = &subgridDistances[TN   *numberOfSubgridIndices];
-    subgridD.q[BS  ] = &subgridDistances[BS   *numberOfSubgridIndices];
-    subgridD.q[BN  ] = &subgridDistances[BN   *numberOfSubgridIndices];
-    subgridD.q[TS  ] = &subgridDistances[TS   *numberOfSubgridIndices];
-    subgridD.q[REST] = &subgridDistances[REST *numberOfSubgridIndices];
-    subgridD.q[TNE ] = &subgridDistances[TNE  *numberOfSubgridIndices];
-    subgridD.q[TSW ] = &subgridDistances[TSW  *numberOfSubgridIndices];
-    subgridD.q[TSE ] = &subgridDistances[TSE  *numberOfSubgridIndices];
-    subgridD.q[TNW ] = &subgridDistances[TNW  *numberOfSubgridIndices];
-    subgridD.q[BNE ] = &subgridDistances[BNE  *numberOfSubgridIndices];
-    subgridD.q[BSW ] = &subgridDistances[BSW  *numberOfSubgridIndices];
-    subgridD.q[BSE ] = &subgridDistances[BSE  *numberOfSubgridIndices];
-    subgridD.q[BNW ] = &subgridDistances[BNW  *numberOfSubgridIndices];
+    subgridD.q[DIR_P00   ] = &subgridDistances[DIR_P00    *numberOfSubgridIndices];
+    subgridD.q[DIR_M00   ] = &subgridDistances[DIR_M00    *numberOfSubgridIndices];
+    subgridD.q[DIR_0P0   ] = &subgridDistances[DIR_0P0    *numberOfSubgridIndices];
+    subgridD.q[DIR_0M0   ] = &subgridDistances[DIR_0M0    *numberOfSubgridIndices];
+    subgridD.q[DIR_00P   ] = &subgridDistances[DIR_00P    *numberOfSubgridIndices];
+    subgridD.q[DIR_00M   ] = &subgridDistances[DIR_00M    *numberOfSubgridIndices];
+    subgridD.q[DIR_PP0  ] = &subgridDistances[DIR_PP0   *numberOfSubgridIndices];
+    subgridD.q[DIR_MM0  ] = &subgridDistances[DIR_MM0   *numberOfSubgridIndices];
+    subgridD.q[DIR_PM0  ] = &subgridDistances[DIR_PM0   *numberOfSubgridIndices];
+    subgridD.q[DIR_MP0  ] = &subgridDistances[DIR_MP0   *numberOfSubgridIndices];
+    subgridD.q[DIR_P0P  ] = &subgridDistances[DIR_P0P   *numberOfSubgridIndices];
+    subgridD.q[DIR_M0M  ] = &subgridDistances[DIR_M0M   *numberOfSubgridIndices];
+    subgridD.q[DIR_P0M  ] = &subgridDistances[DIR_P0M   *numberOfSubgridIndices];
+    subgridD.q[DIR_M0P  ] = &subgridDistances[DIR_M0P   *numberOfSubgridIndices];
+    subgridD.q[DIR_0PP  ] = &subgridDistances[DIR_0PP   *numberOfSubgridIndices];
+    subgridD.q[DIR_0MM  ] = &subgridDistances[DIR_0MM   *numberOfSubgridIndices];
+    subgridD.q[DIR_0PM  ] = &subgridDistances[DIR_0PM   *numberOfSubgridIndices];
+    subgridD.q[DIR_0MP  ] = &subgridDistances[DIR_0MP   *numberOfSubgridIndices];
+    subgridD.q[DIR_000] = &subgridDistances[DIR_000 *numberOfSubgridIndices];
+    subgridD.q[DIR_PPP ] = &subgridDistances[DIR_PPP  *numberOfSubgridIndices];
+    subgridD.q[DIR_MMP ] = &subgridDistances[DIR_MMP  *numberOfSubgridIndices];
+    subgridD.q[DIR_PMP ] = &subgridDistances[DIR_PMP  *numberOfSubgridIndices];
+    subgridD.q[DIR_MPP ] = &subgridDistances[DIR_MPP  *numberOfSubgridIndices];
+    subgridD.q[DIR_PPM ] = &subgridDistances[DIR_PPM  *numberOfSubgridIndices];
+    subgridD.q[DIR_MMM ] = &subgridDistances[DIR_MMM  *numberOfSubgridIndices];
+    subgridD.q[DIR_PMM ] = &subgridDistances[DIR_PMM  *numberOfSubgridIndices];
+    subgridD.q[DIR_MPM ] = &subgridDistances[DIR_MPM  *numberOfSubgridIndices];
 }
 
 __inline__ __device__ real getEquilibriumForBC(const real& drho, const real& velocity, const real& cu_sq, const real weight)
@@ -148,6 +148,13 @@ __inline__ __device__ real getInterpolatedDistributionForVeloBC(const real& q, c
            + (q * (f + fInverse) - c6o1 * weight * velocity) / (c1o1 + q);
 }
 
+__inline__ __device__ real getBounceBackDistributionForVeloBC(  const real& f, 
+                                                                const real& velocity, const real weight)
+{
+
+    return f - (c6o1 * weight * velocity);
+}
+
 __inline__ __device__ real getInterpolatedDistributionForNoSlipBC(const real& q, const real& f, const real& fInverse, const real& feq, 
                                                                   const real& omega)
 {
diff --git a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
index d659c88ffb1abc977d58e46f13b31f37ec485fee..2942f7e273f123baeabd066ce31bbb4add31c15d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/LBMKernel.cu
@@ -18,7 +18,7 @@
 
 #include "Parameter/Parameter.h"
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCas27( unsigned int grid_nx,
+void KernelCas27( unsigned int grid_nx,
                              unsigned int grid_ny,
                              unsigned int grid_nz,
                              real s9,
@@ -44,7 +44,7 @@ extern "C" void KernelCas27( unsigned int grid_nx,
      getLastCudaError("LB_Kernel_Casc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSP27( unsigned int numberOfThreads,
+void KernelCasSP27( unsigned int numberOfThreads,
                                real s9,
                                unsigned int* bcMatD,
                                unsigned int* neighborX,
@@ -67,7 +67,7 @@ extern "C" void KernelCasSP27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSPMS27( unsigned int numberOfThreads,
+void KernelCasSPMS27( unsigned int numberOfThreads,
                                  real s9,
                                  unsigned int* bcMatD,
                                  unsigned int* neighborX,
@@ -90,7 +90,7 @@ extern "C" void KernelCasSPMS27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_MS_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
+void KernelCasSPMSOHM27( unsigned int numberOfThreads,
                                     real s9,
                                     unsigned int* bcMatD,
                                     unsigned int* neighborX,
@@ -113,7 +113,7 @@ extern "C" void KernelCasSPMSOHM27( unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_Casc_SP_MS_OHM_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumCompSRTSP27(
+void KernelKumCompSRTSP27(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -142,7 +142,7 @@ extern "C" void KernelKumCompSRTSP27(
       getLastCudaError("LB_Kernel_Kum_New_Comp_SRT_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
+void KernelKum1hSP27(    unsigned int numberOfThreads,
 									real omega,
 									real deltaPhi,
 									real angularVelocity,
@@ -175,7 +175,7 @@ extern "C" void KernelKum1hSP27(    unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelCascadeSP27(  unsigned int numberOfThreads,
+void KernelCascadeSP27(  unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -198,7 +198,7 @@ extern "C" void KernelCascadeSP27(  unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Cascade_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
+void KernelKumNewSP27(   unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -222,7 +222,7 @@ extern "C" void KernelKumNewSP27(   unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_New_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumNewCompSP27(unsigned int numberOfThreads,
+void KernelKumNewCompSP27(unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -255,7 +255,7 @@ extern "C" void KernelKumNewCompSP27(unsigned int numberOfThreads,
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
+void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int numberOfThreads,
 																	real s9,
 																	unsigned int* bcMatD,
 																	unsigned int* neighborX,
@@ -288,7 +288,7 @@ extern "C" void CumulantOnePreconditionedErrorDiffusionChimCompSP27(unsigned int
 		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
+void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThreads,
 														real s9,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
@@ -321,7 +321,7 @@ extern "C" void CumulantOnePreconditionedChimCompSP27(  unsigned int numberOfThr
 		getLastCudaError("Cumulant_One_preconditioned_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CumulantOneChimCompSP27(unsigned int numberOfThreads,
+void CumulantOneChimCompSP27(unsigned int numberOfThreads,
 										real s9,
 										unsigned int* bcMatD,
 										unsigned int* neighborX,
@@ -354,7 +354,7 @@ extern "C" void CumulantOneChimCompSP27(unsigned int numberOfThreads,
 		getLastCudaError("Cumulant_One_chim_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
+void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 									 real s9,
 									 unsigned int* bcMatD,
 									 unsigned int* neighborX,
@@ -384,7 +384,7 @@ extern "C" void KernelKumIsoTestSP27(unsigned int numberOfThreads,
 	getLastCudaError("LB_Kernel_Kum_IsoTest_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
+void KernelKumCompSP27(  unsigned int numberOfThreads,
 									real s9,
 									unsigned int* bcMatD,
 									unsigned int* neighborX,
@@ -408,7 +408,7 @@ extern "C" void KernelKumCompSP27(  unsigned int numberOfThreads,
 		getLastCudaError("LB_Kernel_Kum_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
+void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 									   real omega,
 									   unsigned int* neighborX,
 									   unsigned int* neighborY,
@@ -444,7 +444,7 @@ extern "C" void KernelPMCumOneCompSP27(unsigned int numberOfThreads,
 	getLastCudaError("LB_Kernel_PM_Cum_One_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
+void KernelWaleBySoniMalavCumAA2016CompSP27(
 	unsigned int numberOfThreads,
 	real s9,
 	unsigned int* bcMatD,
@@ -489,7 +489,7 @@ extern "C" void KernelWaleBySoniMalavCumAA2016CompSP27(
 	getLastCudaError("LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
+void KernelADincomp7(   unsigned int numberOfThreads,
 								   real diffusivity,
 								   unsigned int* bcMatD,
 								   unsigned int* neighborX,
@@ -514,7 +514,7 @@ extern "C" void KernelADincomp7(   unsigned int numberOfThreads,
       getLastCudaError("LB_Kernel_AD_Incomp_7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void KernelADincomp27( unsigned int numberOfThreads,
+void KernelADincomp27( unsigned int numberOfThreads,
 								  real diffusivity,
 								  unsigned int* bcMatD,
 								  unsigned int* neighborX,
@@ -539,7 +539,7 @@ extern "C" void KernelADincomp27( unsigned int numberOfThreads,
 	getLastCudaError("LB_Kernel_AD_Incomp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Init27( int myid,
+void Init27( int myid,
                         int numprocs,
                         real u0,
                         unsigned int* geoD,
@@ -576,7 +576,7 @@ extern "C" void Init27( int myid,
 	getLastCudaError("LBInit27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitNonEqPartSP27( unsigned int numberOfThreads,
+void InitNonEqPartSP27( unsigned int numberOfThreads,
                                    unsigned int* neighborX,
                                    unsigned int* neighborY,
                                    unsigned int* neighborZ,
@@ -609,7 +609,7 @@ extern "C" void InitNonEqPartSP27( unsigned int numberOfThreads,
 	getLastCudaError("LBInitNonEqPartSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitThS7(     unsigned int numberOfThreads,
+void InitThS7(     unsigned int numberOfThreads,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
                               unsigned int* neighborZ,
@@ -638,7 +638,7 @@ extern "C" void InitThS7(     unsigned int numberOfThreads,
 	getLastCudaError("InitAD7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitADDev27( unsigned int numberOfThreads,
+void InitADDev27( unsigned int numberOfThreads,
                            unsigned int* neighborX,
                            unsigned int* neighborY,
                            unsigned int* neighborZ,
@@ -667,7 +667,7 @@ extern "C" void InitADDev27( unsigned int numberOfThreads,
 	getLastCudaError("InitAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PostProcessorF3_2018Fehlberg(
+void PostProcessorF3_2018Fehlberg(
 	unsigned int numberOfThreads,
 	real omega,
 	unsigned int* bcMatD,
@@ -705,7 +705,7 @@ extern "C" void PostProcessorF3_2018Fehlberg(
 	getLastCudaError("LB_PostProcessor_F3_2018_Fehlberg execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMac27( real* vxD,
+void CalcMac27( real* vxD,
                            real* vyD,
                            real* vzD,
                            real* rhoD,
@@ -737,7 +737,7 @@ extern "C" void CalcMac27( real* vxD,
 	getLastCudaError("LBCalcMac27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacSP27( real* vxD,
+void CalcMacSP27( real* vxD,
                              real* vyD,
                              real* vzD,
                              real* rhoD,
@@ -768,7 +768,7 @@ extern "C" void CalcMacSP27( real* vxD,
 	getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacCompSP27( real* vxD,
+void CalcMacCompSP27( real* vxD,
 								 real* vyD,
 								 real* vzD,
 								 real* rhoD,
@@ -799,7 +799,7 @@ extern "C" void CalcMacCompSP27( real* vxD,
 	getLastCudaError("LBCalcMacSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacThS7(  real* Conc,
+void CalcMacThS7(  real* Conc,
                               unsigned int* geoD,
                               unsigned int* neighborX,
                               unsigned int* neighborY,
@@ -822,7 +822,7 @@ extern "C" void CalcMacThS7(  real* Conc,
 	getLastCudaError("CalcConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PlaneConcThS7(real* Conc,
+void PlaneConcThS7(real* Conc,
 							  int* kPC,
 							  unsigned int numberOfPointskPC,
 							  unsigned int* geoD,
@@ -849,7 +849,7 @@ extern "C" void PlaneConcThS7(real* Conc,
 	getLastCudaError("GetPlaneConc7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PlaneConcThS27(real* Conc,
+void PlaneConcThS27(real* Conc,
 							   int* kPC,
 							   unsigned int numberOfPointskPC,
 							   unsigned int* geoD,
@@ -876,7 +876,7 @@ extern "C" void PlaneConcThS27(real* Conc,
 	getLastCudaError("GetPlaneConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcConcentration27( unsigned int numberOfThreads,
+void CalcConcentration27( unsigned int numberOfThreads,
                                      real* Conc,
                                      unsigned int* geoD,
                                      unsigned int* neighborX,
@@ -899,7 +899,7 @@ extern "C" void CalcConcentration27( unsigned int numberOfThreads,
 	getLastCudaError("CalcConc27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedSP27(  real* vxD,
+void CalcMedSP27(  real* vxD,
                               real* vyD,
                               real* vzD,
                               real* rhoD,
@@ -930,7 +930,7 @@ extern "C" void CalcMedSP27(  real* vxD,
 	getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedCompSP27(  real* vxD,
+void CalcMedCompSP27(  real* vxD,
 								  real* vyD,
 								  real* vzD,
 								  real* rhoD,
@@ -961,7 +961,7 @@ extern "C" void CalcMedCompSP27(  real* vxD,
 	getLastCudaError("LBCalcMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMedCompAD27(
+void CalcMedCompAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -998,7 +998,7 @@ extern "C" void CalcMedCompAD27(
 	getLastCudaError("LBCalcMedAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcMacMedSP27(  real* vxD,
+void CalcMacMedSP27(  real* vxD,
                                  real* vyD,
                                  real* vzD,
                                  real* rhoD,
@@ -1029,7 +1029,7 @@ extern "C" void CalcMacMedSP27(  real* vxD,
 	getLastCudaError("LBCalcMacMedSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ResetMedianValuesSP27(
+void ResetMedianValuesSP27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1053,7 +1053,7 @@ extern "C" void ResetMedianValuesSP27(
 	getLastCudaError("LBResetMedianValuesSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ResetMedianValuesAD27(
+void ResetMedianValuesAD27(
 	real* vxD,
 	real* vyD,
 	real* vzD,
@@ -1078,7 +1078,7 @@ extern "C" void ResetMedianValuesAD27(
 	getLastCudaError("LBResetMedianValuesAD27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
+void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 										 real* kyzFromfcNEQ,
 										 real* kxzFromfcNEQ,
 										 real* kxxMyyFromfcNEQ,
@@ -1109,7 +1109,7 @@ extern "C" void Calc2ndMomentsIncompSP27(real* kxyFromfcNEQ,
 	getLastCudaError("LBCalc2ndMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
+void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
 										real* kyzFromfcNEQ,
 										real* kxzFromfcNEQ,
 										real* kxxMyyFromfcNEQ,
@@ -1140,7 +1140,7 @@ extern "C" void Calc2ndMomentsCompSP27( real* kxyFromfcNEQ,
 	getLastCudaError("LBCalc2ndMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
+void Calc3rdMomentsIncompSP27(real* CUMbbb,
 										 real* CUMabc,
 										 real* CUMbac,
 										 real* CUMbca,
@@ -1175,7 +1175,7 @@ extern "C" void Calc3rdMomentsIncompSP27(real* CUMbbb,
 	getLastCudaError("LBCalc3rdMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void Calc3rdMomentsCompSP27( real* CUMbbb,
+void Calc3rdMomentsCompSP27( real* CUMbbb,
 										real* CUMabc,
 										real* CUMbac,
 										real* CUMbca,
@@ -1210,7 +1210,7 @@ extern "C" void Calc3rdMomentsCompSP27( real* CUMbbb,
 	getLastCudaError("LBCalc3rdMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
+void CalcHigherMomentsIncompSP27(real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -1251,7 +1251,7 @@ extern "C" void CalcHigherMomentsIncompSP27(real* CUMcbb,
 	getLastCudaError("LBCalcHigherMomentsIncompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcHigherMomentsCompSP27(  real* CUMcbb,
+void CalcHigherMomentsCompSP27(  real* CUMcbb,
 											real* CUMbcb,
 											real* CUMbbc,
 											real* CUMcca,
@@ -1292,7 +1292,7 @@ extern "C" void CalcHigherMomentsCompSP27(  real* CUMcbb,
 	getLastCudaError("LBCalcHigherMomentsCompSP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void LBCalcMeasurePoints27(real* vxMP,
+void LBCalcMeasurePoints27(real* vxMP,
                                       real* vyMP,
                                       real* vzMP,
                                       real* rhoMP,
@@ -1329,7 +1329,7 @@ extern "C" void LBCalcMeasurePoints27(real* vxMP,
 	getLastCudaError("LBCalcMeasurePoints execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BcPress27( int nx,
+void BcPress27( int nx,
                            int ny,
                            int tz,
                            unsigned int grid_nx,
@@ -1358,7 +1358,7 @@ extern "C" void BcPress27( int nx,
 	getLastCudaError("LB_BC_Press_East27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BcVel27(int nx,
+void BcVel27(int nx,
                         int ny,
                         int nz,
                         int itz,
@@ -1395,9 +1395,7 @@ extern "C" void BcVel27(int nx,
 	getLastCudaError("LB_BC_Vel_West_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressDev7( unsigned int numberOfThreads,
-                              int nx,
-                              int ny,
+void QADPressDev7( unsigned int numberOfThreads,
                               real* DD,
                               real* DD7,
                               real* temp,
@@ -1432,7 +1430,7 @@ extern "C" void QADPressDev7( unsigned int numberOfThreads,
 	getLastCudaError("QADPress7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressDev27(unsigned int numberOfThreads,
+void QADPressDev27(unsigned int numberOfThreads,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -1467,7 +1465,7 @@ extern "C" void QADPressDev27(unsigned int numberOfThreads,
 	getLastCudaError("QADPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressNEQNeighborDev27(
+void QADPressNEQNeighborDev27(
 											unsigned int numberOfThreads,
 											real* DD,
 											real* DD27,
@@ -1499,7 +1497,7 @@ extern "C" void QADPressNEQNeighborDev27(
    	getLastCudaError("QADPressNEQNeighbor27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVelDev7(unsigned int numberOfThreads,
+void QADVelDev7(unsigned int numberOfThreads,
                            real* DD,
                            real* DD7,
                            real* temp,
@@ -1535,7 +1533,7 @@ extern "C" void QADVelDev7(unsigned int numberOfThreads,
 	getLastCudaError("QADVel7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVelDev27(  unsigned int numberOfThreads,
+void QADVelDev27(  unsigned int numberOfThreads,
                               real* DD,
                               real* DD27,
                               real* temp,
@@ -1570,7 +1568,7 @@ extern "C" void QADVelDev27(  unsigned int numberOfThreads,
       getLastCudaError("QADVel27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADDev7(unsigned int numberOfThreads,
+void QADDev7(unsigned int numberOfThreads,
                         real* DD,
                         real* DD7,
                         real* temp,
@@ -1606,7 +1604,7 @@ extern "C" void QADDev7(unsigned int numberOfThreads,
 
 //////////////////////////////////////////////////////////////////////////
 // Other advection diffusion kernels are in kernel factory :(
-extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
+void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
    uint numberOfThreads,
    real omegaDiffusivity,
    uint* typeOfGridNode,
@@ -1638,7 +1636,7 @@ extern "C" void FactorizedCentralMomentsAdvectionDiffusionDeviceKernel(
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ADSlipVelDevComp(
+void ADSlipVelDevComp(
 	uint numberOfThreads,
 	real * normalX,
 	real * normalY,
@@ -1676,7 +1674,7 @@ extern "C" void ADSlipVelDevComp(
 }
 //////////////////////////////////////////////////////////////////////////
 
-extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
+void QADDirichletDev27( unsigned int numberOfThreads,
 								   real* DD,
 								   real* DD27,
 								   real* temp,
@@ -1710,7 +1708,7 @@ extern "C" void QADDirichletDev27( unsigned int numberOfThreads,
       getLastCudaError("QADDirichletDev27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADBBDev27(unsigned int numberOfThreads,
+void QADBBDev27(unsigned int numberOfThreads,
                            real* DD,
                            real* DD27,
                            real* temp,
@@ -1743,7 +1741,7 @@ extern "C" void QADBBDev27(unsigned int numberOfThreads,
       getLastCudaError("QADBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
+void QNoSlipADincompDev7(unsigned int numberOfThreads,
 									real* DD,
 									real* DD7,
 									real* temp,
@@ -1777,7 +1775,7 @@ extern "C" void QNoSlipADincompDev7(unsigned int numberOfThreads,
       getLastCudaError("QNoSlipADincomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
+void QNoSlipADincompDev27(  unsigned int numberOfThreads,
 									   real* DD,
 									   real* DD27,
 									   real* temp,
@@ -1811,7 +1809,7 @@ extern "C" void QNoSlipADincompDev27(  unsigned int numberOfThreads,
       getLastCudaError("QNoSlipADincomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
+void QADVeloIncompDev7( unsigned int numberOfThreads,
 								   real* DD,
 								   real* DD7,
 								   real* temp,
@@ -1846,7 +1844,7 @@ extern "C" void QADVeloIncompDev7( unsigned int numberOfThreads,
       getLastCudaError("QADVeloIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
+void QADVeloIncompDev27(   unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -1882,7 +1880,7 @@ extern "C" void QADVeloIncompDev27(   unsigned int numberOfThreads,
       getLastCudaError("QADVeloIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressIncompDev7( unsigned int numberOfThreads,
+void QADPressIncompDev7( unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD7,
 									  real* temp,
@@ -1918,7 +1916,7 @@ extern "C" void QADPressIncompDev7( unsigned int numberOfThreads,
       getLastCudaError("QADPressIncomp7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
+void QADPressIncompDev27(  unsigned int numberOfThreads,
 									  real* DD,
 									  real* DD27,
 									  real* temp,
@@ -1953,7 +1951,7 @@ extern "C" void QADPressIncompDev27(  unsigned int numberOfThreads,
       getLastCudaError("QADPressIncomp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -1973,7 +1971,7 @@ extern "C" void QDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCond
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -1992,7 +1990,7 @@ extern "C" void QDevComp27(LBMSimulationParameter* parameterDevice, QforBoundary
       getLastCudaError("QDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
+void QDevCompThinWalls27(unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2035,7 +2033,7 @@ extern "C" void QDevCompThinWalls27(unsigned int numberOfThreads,
 
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1);
@@ -2054,7 +2052,7 @@ extern "C" void QDev3rdMomentsComp27(LBMSimulationParameter* parameterDevice, Qf
    getLastCudaError("QDevice3rdMomentsComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
+void QDevIncompHighNu27( unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2082,7 +2080,7 @@ extern "C" void QDevIncompHighNu27( unsigned int numberOfThreads,
       getLastCudaError("QDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
+void QDevCompHighNu27(   unsigned int numberOfThreads,
 									real* DD,
 									int* k_Q,
 									real* QQ,
@@ -2110,7 +2108,7 @@ extern "C" void QDevCompHighNu27(   unsigned int numberOfThreads,
       getLastCudaError("QDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2131,7 +2129,7 @@ extern "C" void QVelDevicePlainBB27(LBMSimulationParameter* parameterDevice, Qfo
    getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
+void QVelDeviceCouette27(unsigned int numberOfThreads,
 									real* vx,
 									real* vy,
 									real* vz,
@@ -2164,7 +2162,7 @@ extern "C" void QVelDeviceCouette27(unsigned int numberOfThreads,
       getLastCudaError("QVelDevicePlainBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
+void QVelDevice1h27(   unsigned int numberOfThreads,
 								  int nx,
 								  int ny,
 								  real* vx,
@@ -2211,7 +2209,7 @@ extern "C" void QVelDevice1h27(   unsigned int numberOfThreads,
       getLastCudaError("QVelDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2235,7 +2233,7 @@ extern "C" void QVelDev27(LBMSimulationParameter* parameterDevice, QforBoundaryC
       getLastCudaError("QVelDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
+void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -2269,7 +2267,7 @@ extern "C" void QVelDevCompPlusSlip27(unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceCompPlusSlip27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid(parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2291,7 +2289,7 @@ extern "C" void QVelDevComp27(LBMSimulationParameter* parameterDevice, QforBound
    getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
+void QVelDevCompThinWalls27(unsigned int numberOfThreads,
 							           real* vx,
 							           real* vy,
 							           real* vz,
@@ -2340,7 +2338,7 @@ extern "C" void QVelDevCompThinWalls27(unsigned int numberOfThreads,
    getLastCudaError("QThinWallsPartTwo27 execution failed");
 }
 
-extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2362,7 +2360,7 @@ extern "C" void QVelDevCompZeroPress27(LBMSimulationParameter* parameterDevice,
    getLastCudaError("QVelDeviceCompZeroPress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
+void QVelDevIncompHighNu27(unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -2396,7 +2394,7 @@ extern "C" void QVelDevIncompHighNu27(unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceIncompHighNu27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
+void QVelDevCompHighNu27(  unsigned int numberOfThreads,
 									  real* vx,
 									  real* vy,
 									  real* vz,
@@ -2430,7 +2428,7 @@ extern "C" void QVelDevCompHighNu27(  unsigned int numberOfThreads,
       getLastCudaError("QVelDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
+void QVeloDevEQ27(unsigned int numberOfThreads,
 							 real* VeloX,
 							 real* VeloY,
 							 real* VeloZ,
@@ -2461,7 +2459,7 @@ extern "C" void QVeloDevEQ27(unsigned int numberOfThreads,
       getLastCudaError("QVeloDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QVeloStreetDevEQ27(
+void QVeloStreetDevEQ27(
 	uint  numberOfThreads,
 	real* veloXfraction,
 	real* veloYfraction,
@@ -2494,7 +2492,7 @@ extern "C" void QVeloStreetDevEQ27(
 	getLastCudaError("QVeloStreetDeviceEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2513,11 +2511,11 @@ extern "C" void QSlipDev27(LBMSimulationParameter* parameterDevice, QforBoundary
    getLastCudaError("QSlipDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
-
+   
    QSlipDeviceComp27TurbViscosity<<< grid, threads >>> (
          parameterDevice->distributions.f[0],
          boundaryCondition->k,
@@ -2532,12 +2530,32 @@ extern "C" void QSlipDevCompTurbulentViscosity27(LBMSimulationParameter* paramet
          parameterDevice->isEvenTimestep);
    getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
 }
-
-extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+//////////////////////////////////////////////////////////////////////////
+void QSlipPressureDevCompTurbulentViscosity27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
 
+   QSlipPressureDeviceComp27TurbViscosity<<< grid, threads >>> (
+         parameterDevice->distributions.f[0],
+         boundaryCondition->k,
+         boundaryCondition->q27[0],
+         boundaryCondition->numberOfBCnodes,
+         parameterDevice->omega,
+         parameterDevice->neighborX,
+         parameterDevice->neighborY,
+         parameterDevice->neighborZ,
+         parameterDevice->turbViscosity,
+         parameterDevice->numberOfNodes,
+         parameterDevice->isEvenTimestep);
+   getLastCudaError("QSlipDeviceComp27TurbViscosity execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+   
    QSlipDeviceComp27<<< grid, threads >>> (
          parameterDevice->distributions.f[0],
          boundaryCondition->k,
@@ -2552,7 +2570,26 @@ extern "C" void QSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoun
    getLastCudaError("QSlipDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
+void BBSlipDevComp27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads, boundaryCondition->numberOfBCnodes);
+   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+   QSlipDeviceComp27<<< grid, threads >>> (
+         parameterDevice->distributions.f[0],
+         boundaryCondition->k,
+         boundaryCondition->q27[0],
+         boundaryCondition->numberOfBCnodes,
+         parameterDevice->omega,
+         parameterDevice->neighborX,
+         parameterDevice->neighborY,
+         parameterDevice->neighborZ,
+         parameterDevice->numberOfNodes,
+         parameterDevice->isEvenTimestep);
+   getLastCudaError("BBSlipDeviceComp27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QSlipGeomDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
@@ -2585,7 +2622,7 @@ extern "C" void QSlipGeomDevComp27(unsigned int numberOfThreads,
    getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
+void QSlipNormDevComp27(unsigned int numberOfThreads,
 								   real* DD,
 								   int* k_Q,
 								   real* QQ,
@@ -2618,7 +2655,7 @@ extern "C" void QSlipNormDevComp27(unsigned int numberOfThreads,
       getLastCudaError("QSlipGeomDeviceComp27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void QStressDevComp27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
    dim3 grid = vf::cuda::getCudaGrid(  para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
@@ -2659,7 +2696,7 @@ extern "C" void QStressDevComp27(Parameter *para,  QforBoundaryConditions* bound
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
 {
    dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
    dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
@@ -2696,8 +2733,48 @@ extern "C" void BBStressDev27(Parameter *para,  QforBoundaryConditions* boundary
       para->getParD(level)->isEvenTimestep);
       getLastCudaError("BBStressDevice27 execution failed");
 }
+
+//////////////////////////////////////////////////////////////////////////
+void BBStressPressureDev27(Parameter *para,  QforBoundaryConditions* boundaryCondition, const int level)
+{
+   dim3 grid = vf::cuda::getCudaGrid( para->getParD(level)->numberofthreads, boundaryCondition->numberOfBCnodes);
+   dim3 threads(para->getParD(level)->numberofthreads, 1, 1 );
+
+   BBStressPressureDevice27<<< grid, threads >>> (
+      para->getParD(level)->distributions.f[0],
+      boundaryCondition->k,
+      boundaryCondition->kN,
+      boundaryCondition->q27[0],
+      boundaryCondition->numberOfBCnodes,
+      para->getParD(level)->velocityX,
+      para->getParD(level)->velocityY,
+      para->getParD(level)->velocityY,
+      boundaryCondition->normalX,
+      boundaryCondition->normalY,
+      boundaryCondition->normalZ,
+      boundaryCondition->Vx,
+      boundaryCondition->Vy,
+      boundaryCondition->Vz,
+      boundaryCondition->Vx1,
+      boundaryCondition->Vy1,
+      boundaryCondition->Vz1,
+      para->getParD(level)->wallModel.samplingOffset,
+      para->getParD(level)->wallModel.z0,
+      para->getHasWallModelMonitor(),
+      para->getParD(level)->wallModel.u_star,
+      para->getParD(level)->wallModel.Fx,
+      para->getParD(level)->wallModel.Fy,
+      para->getParD(level)->wallModel.Fz,
+      para->getParD(level)->neighborX,
+      para->getParD(level)->neighborY,
+      para->getParD(level)->neighborZ,
+      para->getParD(level)->numberOfNodes,
+      para->getParD(level)->isEvenTimestep);
+      getLastCudaError("BBStressDevice27 execution failed");
+}
+
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2717,7 +2794,7 @@ extern "C" void QPressDev27(LBMSimulationParameter* parameterDevice, QforBoundar
    getLastCudaError("QPressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
+void QPressDevAntiBB27(  unsigned int numberOfThreads,
                                     real* rhoBC,
 									real* vx,
 									real* vy,
@@ -2752,7 +2829,7 @@ extern "C" void QPressDevAntiBB27(  unsigned int numberOfThreads,
    getLastCudaError("QPressDeviceAntiBB27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevFixBackflow27( unsigned int numberOfThreads,
+void QPressDevFixBackflow27( unsigned int numberOfThreads,
                                         real* rhoBC,
                                         real* DD,
                                         int* k_Q,
@@ -2779,7 +2856,7 @@ extern "C" void QPressDevFixBackflow27( unsigned int numberOfThreads,
    getLastCudaError("QPressDeviceFixBackflow27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevDirDepBot27(  unsigned int numberOfThreads,
+void QPressDevDirDepBot27(  unsigned int numberOfThreads,
                                        real* rhoBC,
                                        real* DD,
                                        int* k_Q,
@@ -2806,7 +2883,7 @@ extern "C" void QPressDevDirDepBot27(  unsigned int numberOfThreads,
    getLastCudaError("QPressDeviceDirDepBot27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2822,11 +2899,34 @@ extern "C" void QPressNoRhoDev27(LBMSimulationParameter* parameterDevice, QforBo
          parameterDevice->neighborY,
          parameterDevice->neighborZ,
          parameterDevice->numberOfNodes,
-         parameterDevice->isEvenTimestep);
+         parameterDevice->isEvenTimestep,
+         vf::lbm::dir::DIR_P00);
    getLastCudaError("QPressNoRhoDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressZeroRhoOutflowDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+{
+   dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
+   dim3 threads(parameterDevice->numberofthreads, 1, 1 );
+
+   QPressZeroRhoOutflowDevice27<<< grid, threads >>> (
+         boundaryCondition->RhoBC,
+         parameterDevice->distributions.f[0],
+         boundaryCondition->k,
+         boundaryCondition->kN,
+         boundaryCondition->numberOfBCnodes,
+         parameterDevice->omega,
+         parameterDevice->neighborX,
+         parameterDevice->neighborY,
+         parameterDevice->neighborZ,
+         parameterDevice->numberOfNodes,
+         parameterDevice->isEvenTimestep,
+         vf::lbm::dir::DIR_P00,
+         parameterDevice->outflowPressureCorrectionFactor);
+   getLastCudaError("QPressZeroRhoOutflowDev27 execution failed");
+}
+//////////////////////////////////////////////////////////////////////////
+void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2846,7 +2946,7 @@ extern "C" void QInflowScaleByPressDev27(LBMSimulationParameter* parameterDevice
    getLastCudaError("QInflowScaleByPressDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevOld27(  unsigned int numberOfThreads,
+void QPressDevOld27(  unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD,
                                      int* k_Q,
@@ -2875,7 +2975,7 @@ extern "C" void QPressDevOld27(  unsigned int numberOfThreads,
    getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2895,7 +2995,7 @@ extern "C" void QPressDevIncompNEQ27(LBMSimulationParameter* parameterDevice, Qf
    getLastCudaError("QPressDeviceIncompNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2915,7 +3015,7 @@ extern "C" void QPressDevNEQ27(LBMSimulationParameter* parameterDevice, QforBoun
    getLastCudaError("QPressDevNEQ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -2936,7 +3036,7 @@ extern "C" void QPressDevEQZ27(LBMSimulationParameter* parameterDevice, QforBoun
       getLastCudaError("QPressDeviceEQZ27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevZero27(unsigned int numberOfThreads,
+void QPressDevZero27(unsigned int numberOfThreads,
                                 real* DD,
                                 int* k_Q,
                                 unsigned int numberOfBCnodes,
@@ -2959,7 +3059,7 @@ extern "C" void QPressDevZero27(unsigned int numberOfThreads,
    getLastCudaError("QPressDeviceOld27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDevFake27(     unsigned int numberOfThreads,
+void QPressDevFake27(     unsigned int numberOfThreads,
                                      real* rhoBC,
                                      real* DD,
                                      int* k_Q,
@@ -2989,7 +3089,7 @@ extern "C" void QPressDevFake27(     unsigned int numberOfThreads,
       getLastCudaError("QPressDeviceFake27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
+void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryConditions* boundaryCondition)
 {
    dim3 grid = vf::cuda::getCudaGrid( parameterDevice->numberofthreads,  boundaryCondition->numberOfBCnodes);
    dim3 threads(parameterDevice->numberofthreads, 1, 1 );
@@ -3007,7 +3107,7 @@ extern "C" void BBDev27(LBMSimulationParameter* parameterDevice, QforBoundaryCon
    getLastCudaError("BBDevice27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
+void QPressDev27_IntBB(  unsigned int numberOfThreads,
 									real* rho,
 									real* DD,
 									int* k_Q,
@@ -3037,7 +3137,7 @@ extern "C" void QPressDev27_IntBB(  unsigned int numberOfThreads,
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
+void PressSchlaffer27(unsigned int numberOfThreads,
                                  real* rhoBC,
                                  real* DD,
                                  real* vx0,
@@ -3075,7 +3175,7 @@ extern "C" void PressSchlaffer27(unsigned int numberOfThreads,
 }
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 //////////////////////////////////////////////////////////////////////////
-extern "C" void VelSchlaffer27(  unsigned int numberOfThreads,
+void VelSchlaffer27(  unsigned int numberOfThreads,
                                  int t,
                                  real* DD,
                                  real* vz0,
@@ -3169,7 +3269,7 @@ extern "C" void PropVelo(   unsigned int numberOfThreads,
       getLastCudaError("PropellerBC execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF27( real* DC,
+void ScaleCF27( real* DC,
                         real* DF,
                         unsigned int* neighborCX,
                         unsigned int* neighborCY,
@@ -3218,7 +3318,7 @@ extern "C" void ScaleCF27( real* DC,
       getLastCudaError("scaleCF27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFEff27(real* DC,
+void ScaleCFEff27(real* DC,
                              real* DF,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -3269,7 +3369,7 @@ extern "C" void ScaleCFEff27(real* DC,
       getLastCudaError("scaleCFEff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFLast27(real* DC,
+void ScaleCFLast27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -3320,7 +3420,7 @@ extern "C" void ScaleCFLast27(real* DC,
       getLastCudaError("scaleCFLast27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFpress27(  real* DC,
+void ScaleCFpress27(  real* DC,
                                  real* DF,
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -3371,7 +3471,7 @@ extern "C" void ScaleCFpress27(  real* DC,
       getLastCudaError("scaleCFpress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_Fix_27(  real* DC,
+void ScaleCF_Fix_27(  real* DC,
                                  real* DF,
                                  unsigned int* neighborCX,
                                  unsigned int* neighborCY,
@@ -3422,7 +3522,7 @@ extern "C" void ScaleCF_Fix_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_Fix_comp_27( real* DC,
+void ScaleCF_Fix_comp_27( real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -3473,7 +3573,7 @@ extern "C" void ScaleCF_Fix_comp_27( real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_0817_comp_27(real* DC,
+void ScaleCF_0817_comp_27(real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -3525,7 +3625,7 @@ extern "C" void ScaleCF_0817_comp_27(real* DC,
       getLastCudaError("scaleCF_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_comp_D3Q27F3_2018(real* DC,
+void ScaleCF_comp_D3Q27F3_2018(real* DC,
 										  real* DF,
 										  real* G6,
 										  unsigned int* neighborCX,
@@ -3578,7 +3678,7 @@ extern "C" void ScaleCF_comp_D3Q27F3_2018(real* DC,
       getLastCudaError("scaleCF_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
+void ScaleCF_comp_D3Q27F3(real* DC,
 									 real* DF,
 									 real* G6,
 									 unsigned int* neighborCX,
@@ -3632,7 +3732,7 @@ extern "C" void ScaleCF_comp_D3Q27F3(real* DC,
       getLastCudaError("scaleCF_comp_D3Q27F3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_staggered_time_comp_27(  real* DC,
+void ScaleCF_staggered_time_comp_27(  real* DC,
 												 real* DF,
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -3683,7 +3783,7 @@ extern "C" void ScaleCF_staggered_time_comp_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_RhoSq_comp_27(   real* DC,
+void ScaleCF_RhoSq_comp_27(   real* DC,
 										 real* DF,
 										 unsigned int* neighborCX,
 										 unsigned int* neighborCY,
@@ -3735,7 +3835,7 @@ extern "C" void ScaleCF_RhoSq_comp_27(   real* DC,
       getLastCudaError("scaleCF_RhoSq_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
+void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
 											 real* DF,
 											 unsigned int* neighborCX,
 											 unsigned int* neighborCY,
@@ -3787,7 +3887,7 @@ extern "C" void ScaleCF_RhoSq_3rdMom_comp_27(real* DC,
       getLastCudaError("scaleCF_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_AA2016_comp_27(real* DC,
+void ScaleCF_AA2016_comp_27(real* DC,
 									   real* DF,
 									   unsigned int* neighborCX,
 									   unsigned int* neighborCY,
@@ -3839,7 +3939,7 @@ extern "C" void ScaleCF_AA2016_comp_27(real* DC,
       getLastCudaError("scaleCF_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCF_NSPress_27(  real* DC,
+void ScaleCF_NSPress_27(  real* DC,
 									 real* DF,
 									 unsigned int* neighborCX,
 									 unsigned int* neighborCY,
@@ -3890,7 +3990,7 @@ extern "C" void ScaleCF_NSPress_27(  real* DC,
       getLastCudaError("scaleCF_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThSMG7(   real* DC,
+void ScaleCFThSMG7(   real* DC,
                                  real* DF,
                                  real* DD7C,
                                  real* DD7F,
@@ -3935,7 +4035,7 @@ extern "C" void ScaleCFThSMG7(   real* DC,
       getLastCudaError("scaleCFThSMG7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThS7(  real* DC,
+void ScaleCFThS7(  real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -3978,7 +4078,7 @@ extern "C" void ScaleCFThS7(  real* DC,
       getLastCudaError("scaleCFThS7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleCFThS27( real* DC,
+void ScaleCFThS27( real* DC,
                               real* DF,
                               real* DD27C,
                               real* DD27F,
@@ -4023,7 +4123,7 @@ extern "C" void ScaleCFThS27( real* DC,
       getLastCudaError("scaleCFThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC27( real* DC,
+void ScaleFC27( real* DC,
                            real* DF,
                            unsigned int* neighborCX,
                            unsigned int* neighborCY,
@@ -4073,7 +4173,7 @@ extern "C" void ScaleFC27( real* DC,
       getLastCudaError("scaleFC27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCEff27(real* DC,
+void ScaleFCEff27(real* DC,
                              real* DF,
                              unsigned int* neighborCX,
                              unsigned int* neighborCY,
@@ -4124,7 +4224,7 @@ extern "C" void ScaleFCEff27(real* DC,
       getLastCudaError("scaleFCEff27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCLast27(real* DC,
+void ScaleFCLast27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -4175,7 +4275,7 @@ extern "C" void ScaleFCLast27(real* DC,
       getLastCudaError("Kernel execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCpress27(real* DC,
+void ScaleFCpress27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -4226,7 +4326,7 @@ extern "C" void ScaleFCpress27(real* DC,
       getLastCudaError("scaleFCpress27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_Fix_27(real* DC,
+void ScaleFC_Fix_27(real* DC,
                               real* DF,
                               unsigned int* neighborCX,
                               unsigned int* neighborCY,
@@ -4277,7 +4377,7 @@ extern "C" void ScaleFC_Fix_27(real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_Fix_comp_27(  real* DC,
+void ScaleFC_Fix_comp_27(  real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -4328,7 +4428,7 @@ extern "C" void ScaleFC_Fix_comp_27(  real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_0817_comp_27( real* DC,
+void ScaleFC_0817_comp_27( real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -4380,7 +4480,7 @@ extern "C" void ScaleFC_0817_comp_27( real* DC,
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_comp_D3Q27F3_2018( real* DC,
+void ScaleFC_comp_D3Q27F3_2018( real* DC,
 										   real* DF,
 										   real* G6,
 										   unsigned int* neighborCX,
@@ -4433,7 +4533,7 @@ extern "C" void ScaleFC_comp_D3Q27F3_2018( real* DC,
       getLastCudaError("scaleFC_comp_D3Q27F3_2018 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
+void ScaleFC_comp_D3Q27F3( real* DC,
 									  real* DF,
 									  real* G6,
 									  unsigned int* neighborCX,
@@ -4487,7 +4587,7 @@ extern "C" void ScaleFC_comp_D3Q27F3( real* DC,
       getLastCudaError("scaleFC_0817_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_staggered_time_comp_27(   real* DC,
+void ScaleFC_staggered_time_comp_27(   real* DC,
 												  real* DF,
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -4538,7 +4638,7 @@ extern "C" void ScaleFC_staggered_time_comp_27(   real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
+void ScaleFC_RhoSq_comp_27(real* DC,
 									  real* DF,
 									  unsigned int* neighborCX,
 									  unsigned int* neighborCY,
@@ -4592,7 +4692,7 @@ extern "C" void ScaleFC_RhoSq_comp_27(real* DC,
 }
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
+void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
 											  real* DF,
 											  unsigned int* neighborCX,
 											  unsigned int* neighborCY,
@@ -4644,7 +4744,7 @@ extern "C" void ScaleFC_RhoSq_3rdMom_comp_27( real* DC,
       getLastCudaError("scaleFC_RhoSq_3rdMom_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_AA2016_comp_27( real* DC,
+void ScaleFC_AA2016_comp_27( real* DC,
 										real* DF,
 										unsigned int* neighborCX,
 										unsigned int* neighborCY,
@@ -4696,7 +4796,7 @@ extern "C" void ScaleFC_AA2016_comp_27( real* DC,
       getLastCudaError("scaleFC_AA2016_comp_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFC_NSPress_27(real* DC,
+void ScaleFC_NSPress_27(real* DC,
 								  real* DF,
 								  unsigned int* neighborCX,
 								  unsigned int* neighborCY,
@@ -4747,7 +4847,7 @@ extern "C" void ScaleFC_NSPress_27(real* DC,
       getLastCudaError("scaleFC_Fix_27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThSMG7(real* DC,
+void ScaleFCThSMG7(real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -4792,7 +4892,7 @@ extern "C" void ScaleFCThSMG7(real* DC,
       getLastCudaError("scaleFCThSMG7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThS7(  real* DC,
+void ScaleFCThS7(  real* DC,
                               real* DF,
                               real* DD7C,
                               real* DD7F,
@@ -4835,7 +4935,7 @@ extern "C" void ScaleFCThS7(  real* DC,
       getLastCudaError("scaleFCThS7 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void ScaleFCThS27( real* DC,
+void ScaleFCThS27( real* DC,
                               real* DF,
                               real* DD27C,
                               real* DD27F,
@@ -4880,7 +4980,7 @@ extern "C" void ScaleFCThS27( real* DC,
       getLastCudaError("scaleFCThS27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void DragLiftPostD27(real* DD,
+void DragLiftPostD27(real* DD,
 								int* k_Q,
 								real* QQ,
 								int numberOfBCnodes,
@@ -4911,7 +5011,7 @@ extern "C" void DragLiftPostD27(real* DD,
 	getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void DragLiftPreD27( real* DD,
+void DragLiftPreD27( real* DD,
 								int* k_Q,
 								real* QQ,
 								int numberOfBCnodes,
@@ -4942,7 +5042,7 @@ extern "C" void DragLiftPreD27( real* DD,
 	getLastCudaError("DragLift27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcCPtop27(real* DD,
+void CalcCPtop27(real* DD,
 							int* cpIndex,
 							int nonCp,
 							double *cpPress,
@@ -4967,7 +5067,7 @@ extern "C" void CalcCPtop27(real* DD,
 	getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcCPbottom27( real* DD,
+void CalcCPbottom27( real* DD,
 								int* cpIndex,
 								int nonCp,
 								double *cpPress,
@@ -4992,7 +5092,7 @@ extern "C" void CalcCPbottom27( real* DD,
 	getLastCudaError("CalcCP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetSendFsPreDev27(real* DD,
+void GetSendFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* sendIndex,
 								  int buffmax,
@@ -5018,7 +5118,7 @@ extern "C" void GetSendFsPreDev27(real* DD,
 	getLastCudaError("getSendFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetSendFsPostDev27(real* DD,
+void GetSendFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* sendIndex,
 								   int buffmax,
@@ -5044,7 +5144,7 @@ extern "C" void GetSendFsPostDev27(real* DD,
 	getLastCudaError("getSendFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetRecvFsPreDev27(real* DD,
+void SetRecvFsPreDev27(real* DD,
 								  real* bufferFs,
 								  int* recvIndex,
 								  int buffmax,
@@ -5070,7 +5170,7 @@ extern "C" void SetRecvFsPreDev27(real* DD,
 	getLastCudaError("setRecvFsPre27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetRecvFsPostDev27(real* DD,
+void SetRecvFsPostDev27(real* DD,
 								   real* bufferFs,
 								   int* recvIndex,
 								   int buffmax,
@@ -5096,7 +5196,7 @@ extern "C" void SetRecvFsPostDev27(real* DD,
 	getLastCudaError("setRecvFsPost27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void getSendGsDevF3(
+void getSendGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* sendIndex,
@@ -5123,7 +5223,7 @@ extern "C" void getSendGsDevF3(
 	getLastCudaError("getSendGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void setRecvGsDevF3(
+void setRecvGsDevF3(
 	real* G6,
 	real* bufferGs,
 	int* recvIndex,
@@ -5150,7 +5250,7 @@ extern "C" void setRecvGsDevF3(
 	getLastCudaError("setRecvGsF3 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void WallFuncDev27(unsigned int numberOfThreads,
+void WallFuncDev27(unsigned int numberOfThreads,
 							  real* vx,
 							  real* vy,
 							  real* vz,
@@ -5184,7 +5284,7 @@ extern "C" void WallFuncDev27(unsigned int numberOfThreads,
       getLastCudaError("WallFunction27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
+void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
 										  real* vxD,
 										  real* vyD,
 										  real* vzD,
@@ -5225,7 +5325,7 @@ extern "C" void SetOutputWallVelocitySP27(unsigned int numberOfThreads,
       getLastCudaError("LBSetOutputWallVelocitySP27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
+void GetVelotoForce27(unsigned int numberOfThreads,
 								 real* DD,
 								 int* bcIndex,
 								 int nonAtBC,
@@ -5254,7 +5354,7 @@ extern "C" void GetVelotoForce27(unsigned int numberOfThreads,
       getLastCudaError("GetVeloforForcing27 execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void InitParticlesDevice(real* coordX,
+void InitParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ,
 									real* coordParticleXlocal,
@@ -5307,7 +5407,7 @@ extern "C" void InitParticlesDevice(real* coordX,
       getLastCudaError("InitParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void MoveParticlesDevice(real* coordX,
+void MoveParticlesDevice(real* coordX,
 									real* coordY,
 									real* coordZ,
 									real* coordParticleXlocal,
@@ -5368,7 +5468,7 @@ extern "C" void MoveParticlesDevice(real* coordX,
       getLastCudaError("MoveParticles execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void initRandomDevice(curandState* state,
+void initRandomDevice(curandState* state,
 								 unsigned int size_Mat,
 								 unsigned int numberOfThreads)
 {
@@ -5377,7 +5477,7 @@ extern "C" void initRandomDevice(curandState* state,
    getLastCudaError("initRandom execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void generateRandomValuesDevice( curandState* state,
+void generateRandomValuesDevice( curandState* state,
 											unsigned int size_Mat,
 											real* randArray,
 											unsigned int numberOfThreads)
@@ -5387,7 +5487,7 @@ extern "C" void generateRandomValuesDevice( curandState* state,
    getLastCudaError("generateRandomValues execution failed");
 }
 //////////////////////////////////////////////////////////////////////////
-extern "C" void CalcTurbulenceIntensityDevice(
+void CalcTurbulenceIntensityDevice(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
index b04b7fdd999b96f20c3a32f2b3e8aef1e3222608..314687c4b29a32962b386d7c083f72b754388e5b 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/NoSlipBCs27.cu
@@ -15,7 +15,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDevice3rdMomentsComp27(
+__global__ void QDevice3rdMomentsComp27(
 													 real* distributions, 
 													 int* subgridDistanceIndices, 
 													 real* subgridDistances,
@@ -30,63 +30,63 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &distributions[E   *numberOfLBnodes];
-      D.f[W   ] = &distributions[W   *numberOfLBnodes];
-      D.f[N   ] = &distributions[N   *numberOfLBnodes];
-      D.f[S   ] = &distributions[S   *numberOfLBnodes];
-      D.f[T   ] = &distributions[T   *numberOfLBnodes];
-      D.f[B   ] = &distributions[B   *numberOfLBnodes];
-      D.f[NE  ] = &distributions[NE  *numberOfLBnodes];
-      D.f[SW  ] = &distributions[SW  *numberOfLBnodes];
-      D.f[SE  ] = &distributions[SE  *numberOfLBnodes];
-      D.f[NW  ] = &distributions[NW  *numberOfLBnodes];
-      D.f[TE  ] = &distributions[TE  *numberOfLBnodes];
-      D.f[BW  ] = &distributions[BW  *numberOfLBnodes];
-      D.f[BE  ] = &distributions[BE  *numberOfLBnodes];
-      D.f[TW  ] = &distributions[TW  *numberOfLBnodes];
-      D.f[TN  ] = &distributions[TN  *numberOfLBnodes];
-      D.f[BS  ] = &distributions[BS  *numberOfLBnodes];
-      D.f[BN  ] = &distributions[BN  *numberOfLBnodes];
-      D.f[TS  ] = &distributions[TS  *numberOfLBnodes];
-      D.f[REST] = &distributions[REST*numberOfLBnodes];
-      D.f[TNE ] = &distributions[TNE *numberOfLBnodes];
-      D.f[TSW ] = &distributions[TSW *numberOfLBnodes];
-      D.f[TSE ] = &distributions[TSE *numberOfLBnodes];
-      D.f[TNW ] = &distributions[TNW *numberOfLBnodes];
-      D.f[BNE ] = &distributions[BNE *numberOfLBnodes];
-      D.f[BSW ] = &distributions[BSW *numberOfLBnodes];
-      D.f[BSE ] = &distributions[BSE *numberOfLBnodes];
-      D.f[BNW ] = &distributions[BNW *numberOfLBnodes];
+      D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
+      D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
+      D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
+      D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
+      D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
+      D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
+      D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
+      D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
+      D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
+      D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
+      D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
+      D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
+      D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
+      D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
+      D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
+      D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
+      D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
+      D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
+      D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
+      D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
+      D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
+      D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
+      D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
+      D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
+      D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
    } 
    else
    {
-      D.f[W   ] = &distributions[E   *numberOfLBnodes];
-      D.f[E   ] = &distributions[W   *numberOfLBnodes];
-      D.f[S   ] = &distributions[N   *numberOfLBnodes];
-      D.f[N   ] = &distributions[S   *numberOfLBnodes];
-      D.f[B   ] = &distributions[T   *numberOfLBnodes];
-      D.f[T   ] = &distributions[B   *numberOfLBnodes];
-      D.f[SW  ] = &distributions[NE  *numberOfLBnodes];
-      D.f[NE  ] = &distributions[SW  *numberOfLBnodes];
-      D.f[NW  ] = &distributions[SE  *numberOfLBnodes];
-      D.f[SE  ] = &distributions[NW  *numberOfLBnodes];
-      D.f[BW  ] = &distributions[TE  *numberOfLBnodes];
-      D.f[TE  ] = &distributions[BW  *numberOfLBnodes];
-      D.f[TW  ] = &distributions[BE  *numberOfLBnodes];
-      D.f[BE  ] = &distributions[TW  *numberOfLBnodes];
-      D.f[BS  ] = &distributions[TN  *numberOfLBnodes];
-      D.f[TN  ] = &distributions[BS  *numberOfLBnodes];
-      D.f[TS  ] = &distributions[BN  *numberOfLBnodes];
-      D.f[BN  ] = &distributions[TS  *numberOfLBnodes];
-      D.f[REST] = &distributions[REST*numberOfLBnodes];
-      D.f[TNE ] = &distributions[BSW *numberOfLBnodes];
-      D.f[TSW ] = &distributions[BNE *numberOfLBnodes];
-      D.f[TSE ] = &distributions[BNW *numberOfLBnodes];
-      D.f[TNW ] = &distributions[BSE *numberOfLBnodes];
-      D.f[BNE ] = &distributions[TSW *numberOfLBnodes];
-      D.f[BSW ] = &distributions[TNE *numberOfLBnodes];
-      D.f[BSE ] = &distributions[TNW *numberOfLBnodes];
-      D.f[BNW ] = &distributions[TSE *numberOfLBnodes];
+      D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
+      D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
+      D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
+      D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
+      D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
+      D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
+      D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
+      D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
+      D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
+      D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
+      D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
+      D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
+      D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
+      D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
+      D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
+      D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
+      D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
+      D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
+      D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
+      D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
+      D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
+      D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
+      D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
+      D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
+      D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
+      D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,32 +107,32 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &subgridDistances[E   * numberOfBCnodes];
-      q_dirW   = &subgridDistances[W   * numberOfBCnodes];
-      q_dirN   = &subgridDistances[N   * numberOfBCnodes];
-      q_dirS   = &subgridDistances[S   * numberOfBCnodes];
-      q_dirT   = &subgridDistances[T   * numberOfBCnodes];
-      q_dirB   = &subgridDistances[B   * numberOfBCnodes];
-      q_dirNE  = &subgridDistances[NE  * numberOfBCnodes];
-      q_dirSW  = &subgridDistances[SW  * numberOfBCnodes];
-      q_dirSE  = &subgridDistances[SE  * numberOfBCnodes];
-      q_dirNW  = &subgridDistances[NW  * numberOfBCnodes];
-      q_dirTE  = &subgridDistances[TE  * numberOfBCnodes];
-      q_dirBW  = &subgridDistances[BW  * numberOfBCnodes];
-      q_dirBE  = &subgridDistances[BE  * numberOfBCnodes];
-      q_dirTW  = &subgridDistances[TW  * numberOfBCnodes];
-      q_dirTN  = &subgridDistances[TN  * numberOfBCnodes];
-      q_dirBS  = &subgridDistances[BS  * numberOfBCnodes];
-      q_dirBN  = &subgridDistances[BN  * numberOfBCnodes];
-      q_dirTS  = &subgridDistances[TS  * numberOfBCnodes];
-      q_dirTNE = &subgridDistances[TNE * numberOfBCnodes];
-      q_dirTSW = &subgridDistances[TSW * numberOfBCnodes];
-      q_dirTSE = &subgridDistances[TSE * numberOfBCnodes];
-      q_dirTNW = &subgridDistances[TNW * numberOfBCnodes];
-      q_dirBNE = &subgridDistances[BNE * numberOfBCnodes];
-      q_dirBSW = &subgridDistances[BSW * numberOfBCnodes];
-      q_dirBSE = &subgridDistances[BSE * numberOfBCnodes];
-      q_dirBNW = &subgridDistances[BNW * numberOfBCnodes];
+      q_dirE   = &subgridDistances[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &subgridDistances[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &subgridDistances[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &subgridDistances[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &subgridDistances[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &subgridDistances[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &subgridDistances[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &subgridDistances[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &subgridDistances[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &subgridDistances[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &subgridDistances[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &subgridDistances[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &subgridDistances[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &subgridDistances[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &subgridDistances[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &subgridDistances[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &subgridDistances[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &subgridDistances[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &subgridDistances[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &subgridDistances[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &subgridDistances[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &subgridDistances[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &subgridDistances[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &subgridDistances[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &subgridDistances[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &subgridDistances[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = subgridDistanceIndices[k];
@@ -167,37 +167,37 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q, m3;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 				f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -217,67 +217,67 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &distributions[E   *numberOfLBnodes];
-         D.f[W   ] = &distributions[W   *numberOfLBnodes];
-         D.f[N   ] = &distributions[N   *numberOfLBnodes];
-         D.f[S   ] = &distributions[S   *numberOfLBnodes];
-         D.f[T   ] = &distributions[T   *numberOfLBnodes];
-         D.f[B   ] = &distributions[B   *numberOfLBnodes];
-         D.f[NE  ] = &distributions[NE  *numberOfLBnodes];
-         D.f[SW  ] = &distributions[SW  *numberOfLBnodes];
-         D.f[SE  ] = &distributions[SE  *numberOfLBnodes];
-         D.f[NW  ] = &distributions[NW  *numberOfLBnodes];
-         D.f[TE  ] = &distributions[TE  *numberOfLBnodes];
-         D.f[BW  ] = &distributions[BW  *numberOfLBnodes];
-         D.f[BE  ] = &distributions[BE  *numberOfLBnodes];
-         D.f[TW  ] = &distributions[TW  *numberOfLBnodes];
-         D.f[TN  ] = &distributions[TN  *numberOfLBnodes];
-         D.f[BS  ] = &distributions[BS  *numberOfLBnodes];
-         D.f[BN  ] = &distributions[BN  *numberOfLBnodes];
-         D.f[TS  ] = &distributions[TS  *numberOfLBnodes];
-         D.f[REST] = &distributions[REST*numberOfLBnodes];
-         D.f[TNE ] = &distributions[TNE *numberOfLBnodes];
-         D.f[TSW ] = &distributions[TSW *numberOfLBnodes];
-         D.f[TSE ] = &distributions[TSE *numberOfLBnodes];
-         D.f[TNW ] = &distributions[TNW *numberOfLBnodes];
-         D.f[BNE ] = &distributions[BNE *numberOfLBnodes];
-         D.f[BSW ] = &distributions[BSW *numberOfLBnodes];
-         D.f[BSE ] = &distributions[BSE *numberOfLBnodes];
-         D.f[BNW ] = &distributions[BNW *numberOfLBnodes];
+         D.f[DIR_P00   ] = &distributions[DIR_P00   *numberOfLBnodes];
+         D.f[DIR_M00   ] = &distributions[DIR_M00   *numberOfLBnodes];
+         D.f[DIR_0P0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
+         D.f[DIR_0M0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
+         D.f[DIR_00P   ] = &distributions[DIR_00P   *numberOfLBnodes];
+         D.f[DIR_00M   ] = &distributions[DIR_00M   *numberOfLBnodes];
+         D.f[DIR_PP0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
+         D.f[DIR_MM0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
+         D.f[DIR_PM0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
+         D.f[DIR_MP0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
+         D.f[DIR_P0P  ] = &distributions[DIR_P0P  *numberOfLBnodes];
+         D.f[DIR_M0M  ] = &distributions[DIR_M0M  *numberOfLBnodes];
+         D.f[DIR_P0M  ] = &distributions[DIR_P0M  *numberOfLBnodes];
+         D.f[DIR_M0P  ] = &distributions[DIR_M0P  *numberOfLBnodes];
+         D.f[DIR_0PP  ] = &distributions[DIR_0PP  *numberOfLBnodes];
+         D.f[DIR_0MM  ] = &distributions[DIR_0MM  *numberOfLBnodes];
+         D.f[DIR_0PM  ] = &distributions[DIR_0PM  *numberOfLBnodes];
+         D.f[DIR_0MP  ] = &distributions[DIR_0MP  *numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
+         D.f[DIR_PPP ] = &distributions[DIR_PPP *numberOfLBnodes];
+         D.f[DIR_MMP ] = &distributions[DIR_MMP *numberOfLBnodes];
+         D.f[DIR_PMP ] = &distributions[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_MPP ] = &distributions[DIR_MPP *numberOfLBnodes];
+         D.f[DIR_PPM ] = &distributions[DIR_PPM *numberOfLBnodes];
+         D.f[DIR_MMM ] = &distributions[DIR_MMM *numberOfLBnodes];
+         D.f[DIR_PMM ] = &distributions[DIR_PMM *numberOfLBnodes];
+         D.f[DIR_MPM ] = &distributions[DIR_MPM *numberOfLBnodes];
       } 
       else
       {
-         D.f[W   ] = &distributions[E   *numberOfLBnodes];
-         D.f[E   ] = &distributions[W   *numberOfLBnodes];
-         D.f[S   ] = &distributions[N   *numberOfLBnodes];
-         D.f[N   ] = &distributions[S   *numberOfLBnodes];
-         D.f[B   ] = &distributions[T   *numberOfLBnodes];
-         D.f[T   ] = &distributions[B   *numberOfLBnodes];
-         D.f[SW  ] = &distributions[NE  *numberOfLBnodes];
-         D.f[NE  ] = &distributions[SW  *numberOfLBnodes];
-         D.f[NW  ] = &distributions[SE  *numberOfLBnodes];
-         D.f[SE  ] = &distributions[NW  *numberOfLBnodes];
-         D.f[BW  ] = &distributions[TE  *numberOfLBnodes];
-         D.f[TE  ] = &distributions[BW  *numberOfLBnodes];
-         D.f[TW  ] = &distributions[BE  *numberOfLBnodes];
-         D.f[BE  ] = &distributions[TW  *numberOfLBnodes];
-         D.f[BS  ] = &distributions[TN  *numberOfLBnodes];
-         D.f[TN  ] = &distributions[BS  *numberOfLBnodes];
-         D.f[TS  ] = &distributions[BN  *numberOfLBnodes];
-         D.f[BN  ] = &distributions[TS  *numberOfLBnodes];
-         D.f[REST] = &distributions[REST*numberOfLBnodes];
-         D.f[TNE ] = &distributions[BSW *numberOfLBnodes];
-         D.f[TSW ] = &distributions[BNE *numberOfLBnodes];
-         D.f[TSE ] = &distributions[BNW *numberOfLBnodes];
-         D.f[TNW ] = &distributions[BSE *numberOfLBnodes];
-         D.f[BNE ] = &distributions[TSW *numberOfLBnodes];
-         D.f[BSW ] = &distributions[TNE *numberOfLBnodes];
-         D.f[BSE ] = &distributions[TNW *numberOfLBnodes];
-         D.f[BNW ] = &distributions[TSE *numberOfLBnodes];
+         D.f[DIR_M00   ] = &distributions[DIR_P00   *numberOfLBnodes];
+         D.f[DIR_P00   ] = &distributions[DIR_M00   *numberOfLBnodes];
+         D.f[DIR_0M0   ] = &distributions[DIR_0P0   *numberOfLBnodes];
+         D.f[DIR_0P0   ] = &distributions[DIR_0M0   *numberOfLBnodes];
+         D.f[DIR_00M   ] = &distributions[DIR_00P   *numberOfLBnodes];
+         D.f[DIR_00P   ] = &distributions[DIR_00M   *numberOfLBnodes];
+         D.f[DIR_MM0  ] = &distributions[DIR_PP0  *numberOfLBnodes];
+         D.f[DIR_PP0  ] = &distributions[DIR_MM0  *numberOfLBnodes];
+         D.f[DIR_MP0  ] = &distributions[DIR_PM0  *numberOfLBnodes];
+         D.f[DIR_PM0  ] = &distributions[DIR_MP0  *numberOfLBnodes];
+         D.f[DIR_M0M  ] = &distributions[DIR_P0P  *numberOfLBnodes];
+         D.f[DIR_P0P  ] = &distributions[DIR_M0M  *numberOfLBnodes];
+         D.f[DIR_M0P  ] = &distributions[DIR_P0M  *numberOfLBnodes];
+         D.f[DIR_P0M  ] = &distributions[DIR_M0P  *numberOfLBnodes];
+         D.f[DIR_0MM  ] = &distributions[DIR_0PP  *numberOfLBnodes];
+         D.f[DIR_0PP  ] = &distributions[DIR_0MM  *numberOfLBnodes];
+         D.f[DIR_0MP  ] = &distributions[DIR_0PM  *numberOfLBnodes];
+         D.f[DIR_0PM  ] = &distributions[DIR_0MP  *numberOfLBnodes];
+         D.f[DIR_000] = &distributions[DIR_000*numberOfLBnodes];
+         D.f[DIR_PPP ] = &distributions[DIR_MMM *numberOfLBnodes];
+         D.f[DIR_MMP ] = &distributions[DIR_PPM *numberOfLBnodes];
+         D.f[DIR_PMP ] = &distributions[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_MPP ] = &distributions[DIR_PMM *numberOfLBnodes];
+         D.f[DIR_PPM ] = &distributions[DIR_MMP *numberOfLBnodes];
+         D.f[DIR_MMM ] = &distributions[DIR_PPP *numberOfLBnodes];
+         D.f[DIR_PMM ] = &distributions[DIR_MPP *numberOfLBnodes];
+         D.f[DIR_MPM ] = &distributions[DIR_PMP *numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-         //(D.f[REST])[k]=c1o10;
+         //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  
 	  
@@ -288,8 +288,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_E - f_W - c2o1 * drho * c2o27 * (c3o1*( vx1        ));
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W-m3+(f_E+f_W-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_E+f_W))/(c1o1+q)+(m3*c1o2);
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W-m3+(f_E+f_W-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_E+f_W))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
@@ -297,8 +297,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_W - f_E - c2o1 * drho * c2o27 * (c3o1*(-vx1        ));
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E-m3+(f_W+f_E-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_W+f_E))/(c1o1+q)+(m3*c1o2);
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E-m3+(f_W+f_E-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_W+f_E))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
@@ -306,8 +306,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_N - f_S - c2o1 * drho * c2o27 * (c3o1*( vx2        ));
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S-m3+(f_N+f_S-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_N+f_S))/(c1o1+q)+(m3*c1o2);
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S-m3+(f_N+f_S-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_N+f_S))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
@@ -315,8 +315,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_S - f_N - c2o1 * drho * c2o27 * (c3o1*(   -vx2     ));
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N-m3+(f_S+f_N-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_S+f_N))/(c1o1+q)+(m3*c1o2);
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N-m3+(f_S+f_N-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_S+f_N))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
@@ -324,8 +324,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_T - f_B - c2o1 * drho * c2o27 * (c3o1*(         vx3));
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B-m3+(f_T+f_B-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_T+f_B))/(c1o1+q)+(m3*c1o2);
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B-m3+(f_T+f_B-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_T+f_B))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
@@ -333,8 +333,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_B - f_T - c2o1 * drho * c2o27 * (c3o1*(        -vx3));
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T-m3+(f_B+f_T-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_B+f_T))/(c1o1+q)+(m3*c1o2);
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T-m3+(f_B+f_T-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_B+f_T))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
@@ -342,8 +342,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_NE - f_SW - c2o1 * drho * c1o54 * (c3o1*( vx1+vx2    ));
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW-m3+(f_NE+f_SW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_NE+f_SW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW-m3+(f_NE+f_SW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_NE+f_SW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
@@ -351,8 +351,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_SW - f_NE - c2o1 * drho * c1o54 * (c3o1*(-vx1-vx2    ));
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE-m3+(f_SW+f_NE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_SW+f_NE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE-m3+(f_SW+f_NE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_SW+f_NE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
@@ -360,8 +360,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_SE - f_NW - c2o1 * drho * c1o54 * (c3o1*( vx1-vx2    ));
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW-m3+(f_SE+f_NW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_SE+f_NW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW-m3+(f_SE+f_NW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_SE+f_NW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
@@ -369,8 +369,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_NW - f_SE - c2o1 * drho * c1o54 * (c3o1*(-vx1+vx2    ));
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE-m3+(f_NW+f_SE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_NW+f_SE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE-m3+(f_NW+f_SE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_NW+f_SE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
@@ -378,8 +378,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TE - f_BW - c2o1 * drho * c1o54 * (c3o1*( vx1    +vx3));
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW-m3+(f_TE+f_BW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TE+f_BW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW-m3+(f_TE+f_BW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TE+f_BW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
@@ -387,8 +387,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BW - f_TE - c2o1 * drho * c1o54 * (c3o1*(-vx1    -vx3));
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE-m3+(f_BW+f_TE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BW+f_TE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE-m3+(f_BW+f_TE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BW+f_TE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
@@ -396,8 +396,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BE - f_TW - c2o1 * drho * c1o54 * (c3o1*( vx1    -vx3));
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW-m3+(f_BE+f_TW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BE+f_TW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW-m3+(f_BE+f_TW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BE+f_TW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
@@ -405,8 +405,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TW - f_BE - c2o1 * drho * c1o54 * (c3o1*(-vx1    +vx3));
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE-m3+(f_TW+f_BE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TW+f_BE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE-m3+(f_TW+f_BE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TW+f_BE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
@@ -414,8 +414,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TN - f_BS - c2o1 * drho * c1o54 * (c3o1*(     vx2+vx3));
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS-m3+(f_TN+f_BS-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TN+f_BS))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS-m3+(f_TN+f_BS-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TN+f_BS))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
@@ -423,8 +423,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BS - f_TN - c2o1 * drho * c1o54 * (c3o1*(    -vx2-vx3));
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN-m3+(f_BS+f_TN-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BS+f_TN))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN-m3+(f_BS+f_TN-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BS+f_TN))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
@@ -432,8 +432,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BN - f_TS - c2o1 * drho * c1o54 * (c3o1*(     vx2-vx3));
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS-m3+(f_BN+f_TS-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BN+f_TS))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS-m3+(f_BN+f_TS-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BN+f_TS))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
@@ -441,8 +441,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TS - f_BN - c2o1 * drho * c1o54 * (c3o1*(    -vx2+vx3));
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN-m3+(f_TS+f_BN-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TS+f_BN))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN-m3+(f_TS+f_BN-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TS+f_BN))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
@@ -450,8 +450,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TNE - f_BSW - c2o1 * drho * c1o216 * (c3o1*( vx1+vx2+vx3));
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW-m3+(f_TNE+f_BSW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW-m3+(f_TNE+f_BSW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
@@ -459,8 +459,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BSW - f_TNE - c2o1 * drho * c1o216 * (c3o1*(-vx1-vx2-vx3));
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE-m3+(f_BSW+f_TNE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE-m3+(f_BSW+f_TNE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
@@ -468,8 +468,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BNE - f_TSW - c2o1 * drho * c1o216 * (c3o1*( vx1+vx2-vx3));
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW-m3+(f_BNE+f_TSW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW-m3+(f_BNE+f_TSW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
@@ -477,8 +477,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TSW - f_BNE - c2o1 * drho * c1o216 * (c3o1*(-vx1-vx2+vx3));
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE-m3+(f_TSW+f_BNE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE-m3+(f_TSW+f_BNE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
@@ -486,8 +486,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TSE - f_BNW - c2o1 * drho * c1o216 * (c3o1*( vx1-vx2+vx3));
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW-m3+(f_TSE+f_BNW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW-m3+(f_TSE+f_BNW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
@@ -495,8 +495,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BNW - f_TSE - c2o1 * drho * c1o216 * (c3o1*(-vx1+vx2-vx3));
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE-m3+(f_BNW+f_TSE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE-m3+(f_BNW+f_TSE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
@@ -504,8 +504,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_BSE - f_TNW - c2o1 * drho * c1o216 * (c3o1*( vx1-vx2-vx3));
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW-m3+(f_BSE+f_TNW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q)+(m3*c1o2);
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW-m3+(f_BSE+f_TNW-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
@@ -513,8 +513,8 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
       {
 		 m3 = f_TNW - f_BSE - c2o1 * drho * c1o216 * (c3o1*(-vx1+vx2+vx3));
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE-m3+(f_TNW+f_BSE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q)+(m3*c1o2);
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE-m3+(f_TNW+f_BSE-c2o1*feq*omega)/(c1o1-omega))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q)+(m3*c1o2);
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -559,80 +559,77 @@ extern "C" __global__ void QDevice3rdMomentsComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceIncompHighNu27(int inx,
-												 int iny,
-												 real* DD, 
+__global__ void QDeviceIncompHighNu27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
-												 unsigned int  numberOfBCnodes,
-												 int numberOfNodes, 
+												 unsigned int numberOfBCnodes,
 												 real om1, 
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
+												 unsigned int numberOfLBnodes, 
 												 bool isEvenTimestep)
 {
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
+      D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -645,7 +642,7 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
    const unsigned k = nx*(ny*z + y) + x;
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfNodes)
+   if(k<numberOfBCnodes)
    {
       ////////////////////////////////////////////////////////////////////////////////
       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
@@ -653,32 +650,32 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -713,37 +710,37 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[E   ])[ke   ];
-      f_W   = (D.f[W   ])[kw   ];
-      f_N   = (D.f[N   ])[kn   ];
-      f_S   = (D.f[S   ])[ks   ];
-      f_T   = (D.f[T   ])[kt   ];
-      f_B   = (D.f[B   ])[kb   ];
-      f_NE  = (D.f[NE  ])[kne  ];
-      f_SW  = (D.f[SW  ])[ksw  ];
-      f_SE  = (D.f[SE  ])[kse  ];
-      f_NW  = (D.f[NW  ])[knw  ];
-      f_TE  = (D.f[TE  ])[kte  ];
-      f_BW  = (D.f[BW  ])[kbw  ];
-      f_BE  = (D.f[BE  ])[kbe  ];
-      f_TW  = (D.f[TW  ])[ktw  ];
-      f_TN  = (D.f[TN  ])[ktn  ];
-      f_BS  = (D.f[BS  ])[kbs  ];
-      f_BN  = (D.f[BN  ])[kbn  ];
-      f_TS  = (D.f[TS  ])[kts  ];
-      f_TNE = (D.f[TNE ])[ktne ];
-      f_TSW = (D.f[TSW ])[ktsw ];
-      f_TSE = (D.f[TSE ])[ktse ];
-      f_TNW = (D.f[TNW ])[ktnw ];
-      f_BNE = (D.f[BNE ])[kbne ];
-      f_BSW = (D.f[BSW ])[kbsw ];
-      f_BSE = (D.f[BSE ])[kbse ];
-      f_BNW = (D.f[BNW ])[kbnw ];
+      f_E   = (D.f[DIR_P00   ])[ke   ];
+      f_W   = (D.f[DIR_M00   ])[kw   ];
+      f_N   = (D.f[DIR_0P0   ])[kn   ];
+      f_S   = (D.f[DIR_0M0   ])[ks   ];
+      f_T   = (D.f[DIR_00P   ])[kt   ];
+      f_B   = (D.f[DIR_00M   ])[kb   ];
+      f_NE  = (D.f[DIR_PP0  ])[kne  ];
+      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+      f_SE  = (D.f[DIR_PM0  ])[kse  ];
+      f_NW  = (D.f[DIR_MP0  ])[knw  ];
+      f_TE  = (D.f[DIR_P0P  ])[kte  ];
+      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+      f_TS  = (D.f[DIR_0MP  ])[kts  ];
+      f_TNE = (D.f[DIR_PPP ])[ktne ];
+      f_TSW = (D.f[DIR_MMP ])[ktsw ];
+      f_TSE = (D.f[DIR_PMP ])[ktse ];
+      f_TNW = (D.f[DIR_MPP ])[ktnw ];
+      f_BNE = (D.f[DIR_PPM ])[kbne ];
+      f_BSW = (D.f[DIR_MMM ])[kbsw ];
+      f_BSE = (D.f[DIR_PMM ])[kbse ];
+      f_BNW = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 				f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -763,67 +760,67 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *numberOfLBnodes];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *numberOfLBnodes];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *numberOfLBnodes];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *numberOfLBnodes];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *numberOfLBnodes];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *numberOfLBnodes];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *numberOfLBnodes];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *numberOfLBnodes];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *numberOfLBnodes];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *numberOfLBnodes];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *numberOfLBnodes];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *numberOfLBnodes];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *numberOfLBnodes];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *numberOfLBnodes];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *numberOfLBnodes];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *numberOfLBnodes];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *numberOfLBnodes];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *numberOfLBnodes];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *numberOfLBnodes];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *numberOfLBnodes];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *numberOfLBnodes];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *numberOfLBnodes];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *numberOfLBnodes];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *numberOfLBnodes];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *numberOfLBnodes];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *numberOfLBnodes];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *numberOfLBnodes];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *numberOfLBnodes];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *numberOfLBnodes];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *numberOfLBnodes];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *numberOfLBnodes];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *numberOfLBnodes];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *numberOfLBnodes];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *numberOfLBnodes];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *numberOfLBnodes];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *numberOfLBnodes];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *numberOfLBnodes];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *numberOfLBnodes];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *numberOfLBnodes];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *numberOfLBnodes];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *numberOfLBnodes];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *numberOfLBnodes];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *numberOfLBnodes];
+         D.f[DIR_000] = &DD[DIR_000*numberOfLBnodes];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *numberOfLBnodes];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *numberOfLBnodes];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *numberOfLBnodes];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *numberOfLBnodes];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *numberOfLBnodes];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *numberOfLBnodes];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *numberOfLBnodes];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *numberOfLBnodes];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-         //(D.f[REST])[k]=c1o10;
+         //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX = c0o1;
       real VeloY = c0o1;
@@ -837,182 +834,182 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) /** (one + drho)*/-cu_sq); 
-         (D.f[W])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
+         (D.f[DIR_M00])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) /** (one + drho)*/-cu_sq); 
-         (D.f[E])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
+         (D.f[DIR_P00])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[S])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
+         (D.f[DIR_0M0])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[N])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
+         (D.f[DIR_0P0])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[B])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
+         (D.f[DIR_00M])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[T])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
+         (D.f[DIR_00P])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[SW])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
+         (D.f[DIR_MM0])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[NE])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
+         (D.f[DIR_PP0])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[NW])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
+         (D.f[DIR_MP0])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[SE])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
+         (D.f[DIR_PM0])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BW])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_M0M])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TE])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_P0P])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TW])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_M0P])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BE])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_P0M])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BS])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0MM])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TN])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0PP])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TS])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0MP])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BN])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0PM])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BSW])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MMM])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TNE])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PPP])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TSW])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MMP])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BNE])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PPM])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BNW])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MPM])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TSE])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PMP])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TNW])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MPP])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BSE])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PMM])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
       }
    }
 }
@@ -1057,7 +1054,7 @@ extern "C" __global__ void QDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceCompHighNu27(
+__global__ void QDeviceCompHighNu27(
 												 real* DD, 
 												 int* k_Q, 
 												 real* QQ,
@@ -1072,63 +1069,63 @@ extern "C" __global__ void QDeviceCompHighNu27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1149,32 +1146,32 @@ extern "C" __global__ void QDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int numberOfNodesK  = k_Q[k];
@@ -1209,63 +1206,63 @@ extern "C" __global__ void QDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
             f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[E   ])[ke   ];
-      f_W   = (D.f[W   ])[kw   ];
-      f_N   = (D.f[N   ])[kn   ];
-      f_S   = (D.f[S   ])[ks   ];
-      f_T   = (D.f[T   ])[kt   ];
-      f_B   = (D.f[B   ])[kb   ];
-      f_NE  = (D.f[NE  ])[kne  ];
-      f_SW  = (D.f[SW  ])[ksw  ];
-      f_SE  = (D.f[SE  ])[kse  ];
-      f_NW  = (D.f[NW  ])[knw  ];
-      f_TE  = (D.f[TE  ])[kte  ];
-      f_BW  = (D.f[BW  ])[kbw  ];
-      f_BE  = (D.f[BE  ])[kbe  ];
-      f_TW  = (D.f[TW  ])[ktw  ];
-      f_TN  = (D.f[TN  ])[ktn  ];
-      f_BS  = (D.f[BS  ])[kbs  ];
-      f_BN  = (D.f[BN  ])[kbn  ];
-      f_TS  = (D.f[TS  ])[kts  ];
-      f_TNE = (D.f[TNE ])[ktne ];
-      f_TSW = (D.f[TSW ])[ktsw ];
-      f_TSE = (D.f[TSE ])[ktse ];
-      f_TNW = (D.f[TNW ])[ktnw ];
-      f_BNE = (D.f[BNE ])[kbne ];
-      f_BSW = (D.f[BSW ])[kbsw ];
-      f_BSE = (D.f[BSE ])[kbse ];
-      f_BNW = (D.f[BNW ])[kbnw ];
-      //f_W    = (D.f[E   ])[ke   ];
-      //f_E    = (D.f[W   ])[kw   ];
-      //f_S    = (D.f[N   ])[kn   ];
-      //f_N    = (D.f[S   ])[ks   ];
-      //f_B    = (D.f[T   ])[kt   ];
-      //f_T    = (D.f[B   ])[kb   ];
-      //f_SW   = (D.f[NE  ])[kne  ];
-      //f_NE   = (D.f[SW  ])[ksw  ];
-      //f_NW   = (D.f[SE  ])[kse  ];
-      //f_SE   = (D.f[NW  ])[knw  ];
-      //f_BW   = (D.f[TE  ])[kte  ];
-      //f_TE   = (D.f[BW  ])[kbw  ];
-      //f_TW   = (D.f[BE  ])[kbe  ];
-      //f_BE   = (D.f[TW  ])[ktw  ];
-      //f_BS   = (D.f[TN  ])[ktn  ];
-      //f_TN   = (D.f[BS  ])[kbs  ];
-      //f_TS   = (D.f[BN  ])[kbn  ];
-      //f_BN   = (D.f[TS  ])[kts  ];
-      //f_BSW  = (D.f[TNE ])[ktne ];
-      //f_BNE  = (D.f[TSW ])[ktsw ];
-      //f_BNW  = (D.f[TSE ])[ktse ];
-      //f_BSE  = (D.f[TNW ])[ktnw ];
-      //f_TSW  = (D.f[BNE ])[kbne ];
-      //f_TNE  = (D.f[BSW ])[kbsw ];
-      //f_TNW  = (D.f[BSE ])[kbse ];
-      //f_TSE  = (D.f[BNW ])[kbnw ];
+      f_E   = (D.f[DIR_P00   ])[ke   ];
+      f_W   = (D.f[DIR_M00   ])[kw   ];
+      f_N   = (D.f[DIR_0P0   ])[kn   ];
+      f_S   = (D.f[DIR_0M0   ])[ks   ];
+      f_T   = (D.f[DIR_00P   ])[kt   ];
+      f_B   = (D.f[DIR_00M   ])[kb   ];
+      f_NE  = (D.f[DIR_PP0  ])[kne  ];
+      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+      f_SE  = (D.f[DIR_PM0  ])[kse  ];
+      f_NW  = (D.f[DIR_MP0  ])[knw  ];
+      f_TE  = (D.f[DIR_P0P  ])[kte  ];
+      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+      f_TS  = (D.f[DIR_0MP  ])[kts  ];
+      f_TNE = (D.f[DIR_PPP ])[ktne ];
+      f_TSW = (D.f[DIR_MMP ])[ktsw ];
+      f_TSE = (D.f[DIR_PMP ])[ktse ];
+      f_TNW = (D.f[DIR_MPP ])[ktnw ];
+      f_BNE = (D.f[DIR_PPM ])[kbne ];
+      f_BSW = (D.f[DIR_MMM ])[kbsw ];
+      f_BSE = (D.f[DIR_PMM ])[kbse ];
+      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      //f_W    = (D.f[DIR_P00   ])[ke   ];
+      //f_E    = (D.f[DIR_M00   ])[kw   ];
+      //f_S    = (D.f[DIR_0P0   ])[kn   ];
+      //f_N    = (D.f[DIR_0M0   ])[ks   ];
+      //f_B    = (D.f[DIR_00P   ])[kt   ];
+      //f_T    = (D.f[DIR_00M   ])[kb   ];
+      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 				f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+				f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1285,67 +1282,67 @@ extern "C" __global__ void QDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-         //(D.f[REST])[k]=c1o10;
+         //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real VeloX = c0o1;
       real VeloY = c0o1;
@@ -1359,234 +1356,234 @@ extern "C" __global__ void QDeviceCompHighNu27(
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
-         //(D.f[W])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q) - c2over27 * drho;
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
+         //(D.f[DIR_M00])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
-         //(D.f[E])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q) - c2over27 * drho;
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
+         //(D.f[DIR_P00])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
-         //(D.f[S])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q) - c2over27 * drho;
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
+         //(D.f[DIR_0M0])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
-         //(D.f[N])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q) - c2over27 * drho;
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
+         //(D.f[DIR_0P0])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
-         //(D.f[B])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q) - c2over27 * drho;
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
+         //(D.f[DIR_00M])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
-         //(D.f[T])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q) - c2over27 * drho;
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
+         //(D.f[DIR_00P])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
-         //(D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_MM0])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
-         //(D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_PP0])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
-         //(D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_MP0])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
-         //(D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_PM0])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
-         //(D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_M0M])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
-         //(D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_P0P])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
-         //(D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_M0P])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
-         //(D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_P0M])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0MM])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0PP])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0MP])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0PM])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MMM])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PPP])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MMP])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PPM])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MPM])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PMP])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MPP])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PMM])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -1631,7 +1628,7 @@ extern "C" __global__ void QDeviceCompHighNu27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceComp27(
+__global__ void QDeviceComp27(
 										 real* distributions, 
 										 int* subgridDistanceIndices, 
 										 real* subgridDistances,
@@ -1644,7 +1641,7 @@ extern "C" __global__ void QDeviceComp27(
 										 bool isEvenTimestep)
 {
    //////////////////////////////////////////////////////////////////////////
-   //! The velocity boundary condition is executed in the following steps
+   //! The no-slip boundary condition is executed in the following steps
    //!
    ////////////////////////////////////////////////////////////////////////////////
    //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
@@ -1708,39 +1705,39 @@ extern "C" __global__ void QDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
       //!
       real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                   f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[REST])[kzero]); 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
       real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                    ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1764,213 +1761,212 @@ extern "C" __global__ void QDeviceComp27(
        ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB;
-      q = (subgridD.q[E])[k];
+      q = (subgridD.q[DIR_P00])[k];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[W])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[W])[k];
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
-         
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[E])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[N])[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[S])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[S])[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[N])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[T])[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[B])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[B])[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[T])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[NE])[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[SW])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[SW])[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[NE])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[SE])[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[NW])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[NW])[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[SE])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[TE])[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BW])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[BW])[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TE])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[BE])[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TW])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[TW])[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BE])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[TN])[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BS])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[BS])[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TN])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[BN])[k];
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TS])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[TS])[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BN])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[TNE])[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BSW])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[BSW])[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TNE])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[BNE])[k];
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TSW])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[TSW])[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BNE])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[TSE])[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BNW])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[BNW])[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TSE])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[BSE])[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TNW])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[TNW])[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BSE])[kbse] = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, omega);
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, omega);
       }
    }
 }
@@ -2015,7 +2011,7 @@ extern "C" __global__ void QDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDevice27(real* distributions, 
+__global__ void QDevice27(real* distributions, 
                                      int* subgridDistanceIndices, 
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes, 
@@ -2095,39 +2091,39 @@ extern "C" __global__ void QDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
       //!
       real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                   f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[REST])[kzero]); 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
       real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                    ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -2152,212 +2148,212 @@ extern "C" __global__ void QDevice27(real* distributions,
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB;
-      q = (subgridD.q[E])[k];
+      q = (subgridD.q[DIR_P00])[k];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[W])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, omega);
       }
 
-      q = (subgridD.q[W])[k];
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[E])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, omega);
       }
 
-      q = (subgridD.q[N])[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[S])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, omega);
       }
 
-      q = (subgridD.q[S])[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[N])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, omega);
       }
 
-      q = (subgridD.q[T])[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[B])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, omega);
       }
 
-      q = (subgridD.q[B])[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
-         (dist.f[T])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, omega);
       }
 
-      q = (subgridD.q[NE])[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[SW])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, omega);
       }
 
-      q = (subgridD.q[SW])[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[NE])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, omega);
       }
 
-      q = (subgridD.q[SE])[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[NW])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, omega);
       }
 
-      q = (subgridD.q[NW])[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[SE])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, omega);
       }
 
-      q = (subgridD.q[TE])[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BW])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, omega);
       }
 
-      q = (subgridD.q[BW])[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TE])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, omega);
       }
 
-      q = (subgridD.q[BE])[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TW])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, omega);
       }
 
-      q = (subgridD.q[TW])[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BE])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, omega);
       }
 
-      q = (subgridD.q[TN])[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BS])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, omega);
       }
 
-      q = (subgridD.q[BS])[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TN])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, omega);
       }
 
-      q = (subgridD.q[BN])[k];
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[TS])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, omega);
       }
 
-      q = (subgridD.q[TS])[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
-         (dist.f[BN])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, omega);
       }
 
-      q = (subgridD.q[TNE])[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BSW])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, omega);
       }
 
-      q = (subgridD.q[BSW])[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TNE])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, omega);
       }
 
-      q = (subgridD.q[BNE])[k];
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TSW])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, omega);
       }
 
-      q = (subgridD.q[TSW])[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BNE])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, omega);
       }
 
-      q = (subgridD.q[TSE])[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BNW])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, omega);
       }
 
-      q = (subgridD.q[BNW])[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TSE])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, omega);
       }
 
-      q = (subgridD.q[BSE])[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[TNW])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, omega);
       }
 
-      q = (subgridD.q[TNW])[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
-         (dist.f[BSE])[kbse] = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, omega);
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, omega);
       }
    }
 }
@@ -2402,7 +2398,7 @@ extern "C" __global__ void QDevice27(real* distributions,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void BBDevice27(real* distributions, 
+__global__ void BBDevice27(real* distributions, 
                                      int* subgridDistanceIndices, 
                                      real* subgridDistances,
                                      unsigned int numberOfBCnodes, 
@@ -2478,32 +2474,32 @@ extern "C" __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -2513,32 +2509,32 @@ extern "C" __global__ void BBDevice27(real* distributions,
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[E  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[W  ])[kw  ]=f_E  ;
-      q = (subgridD.q[W  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[E  ])[ke  ]=f_W  ;
-      q = (subgridD.q[N  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[S  ])[ks  ]=f_N  ;
-      q = (subgridD.q[S  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[N  ])[kn  ]=f_S  ;
-      q = (subgridD.q[T  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[B  ])[kb  ]=f_T  ;
-      q = (subgridD.q[B  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[T  ])[kt  ]=f_B  ;
-      q = (subgridD.q[NE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[SW ])[ksw ]=f_NE ;
-      q = (subgridD.q[SW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[NE ])[kne ]=f_SW ;
-      q = (subgridD.q[SE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[NW ])[knw ]=f_SE ;
-      q = (subgridD.q[NW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[SE ])[kse ]=f_NW ;
-      q = (subgridD.q[TE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BW ])[kbw ]=f_TE ;
-      q = (subgridD.q[BW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TE ])[kte ]=f_BW ;
-      q = (subgridD.q[BE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TW ])[ktw ]=f_BE ;
-      q = (subgridD.q[TW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BE ])[kbe ]=f_TW ;
-      q = (subgridD.q[TN ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BS ])[kbs ]=f_TN ;
-      q = (subgridD.q[BS ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TN ])[ktn ]=f_BS ;
-      q = (subgridD.q[BN ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TS ])[kts ]=f_BN ;
-      q = (subgridD.q[TS ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BN ])[kbn ]=f_TS ;
-      q = (subgridD.q[TNE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BSW])[kbsw]=f_TNE;
-      q = (subgridD.q[BSW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TNE])[ktne]=f_BSW;
-      q = (subgridD.q[BNE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TSW])[ktsw]=f_BNE;
-      q = (subgridD.q[TSW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BNE])[kbne]=f_TSW;
-      q = (subgridD.q[TSE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BNW])[kbnw]=f_TSE;
-      q = (subgridD.q[BNW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TSE])[ktse]=f_BNW;
-      q = (subgridD.q[BSE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TNW])[ktnw]=f_BSE;
-      q = (subgridD.q[TNW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BSE])[kbse]=f_TNW;
+      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E  ;
+      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W  ;
+      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N  ;
+      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S  ;
+      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T  ;
+      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B  ;
+      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE ;
+      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW ;
+      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE ;
+      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW ;
+      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE ;
+      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW ;
+      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE ;
+      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW ;
+      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN ;
+      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS ;
+      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN ;
+      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS ;
+      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE;
+      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW;
+      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE;
+      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW;
+      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE;
+      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW;
+      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE;
+      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
index a0192efab597856ecf365ebc69c9035ba06d1b44..3a3ab784e6a7901c41d402629172c3c6154ffde9 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Particles.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void InitParticles( real* coordX,
+__global__ void InitParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ, 
 										  real* coordParticleXlocal,
@@ -158,7 +158,7 @@ extern "C" __global__ void InitParticles( real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void MoveParticles( real* coordX,
+__global__ void MoveParticles( real* coordX,
 										  real* coordY,
 										  real* coordZ, 
 										  real* coordParticleXlocal,
@@ -248,63 +248,63 @@ extern "C" __global__ void MoveParticles( real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[E   *size_Mat];
-			  fwC    = &DD[W   *size_Mat];
-			  fnC    = &DD[N   *size_Mat];
-			  fsC    = &DD[S   *size_Mat];
-			  ftC    = &DD[T   *size_Mat];
-			  fbC    = &DD[B   *size_Mat];
-			  fneC   = &DD[NE  *size_Mat];
-			  fswC   = &DD[SW  *size_Mat];
-			  fseC   = &DD[SE  *size_Mat];
-			  fnwC   = &DD[NW  *size_Mat];
-			  fteC   = &DD[TE  *size_Mat];
-			  fbwC   = &DD[BW  *size_Mat];
-			  fbeC   = &DD[BE  *size_Mat];
-			  ftwC   = &DD[TW  *size_Mat];
-			  ftnC   = &DD[TN  *size_Mat];
-			  fbsC   = &DD[BS  *size_Mat];
-			  fbnC   = &DD[BN  *size_Mat];
-			  ftsC   = &DD[TS  *size_Mat];
-			  fzeroC = &DD[REST*size_Mat];
-			  ftneC  = &DD[TNE *size_Mat];
-			  ftswC  = &DD[TSW *size_Mat];
-			  ftseC  = &DD[TSE *size_Mat];
-			  ftnwC  = &DD[TNW *size_Mat];
-			  fbneC  = &DD[BNE *size_Mat];
-			  fbswC  = &DD[BSW *size_Mat];
-			  fbseC  = &DD[BSE *size_Mat];
-			  fbnwC  = &DD[BNW *size_Mat];
+			  feC    = &DD[DIR_P00   *size_Mat];
+			  fwC    = &DD[DIR_M00   *size_Mat];
+			  fnC    = &DD[DIR_0P0   *size_Mat];
+			  fsC    = &DD[DIR_0M0   *size_Mat];
+			  ftC    = &DD[DIR_00P   *size_Mat];
+			  fbC    = &DD[DIR_00M   *size_Mat];
+			  fneC   = &DD[DIR_PP0  *size_Mat];
+			  fswC   = &DD[DIR_MM0  *size_Mat];
+			  fseC   = &DD[DIR_PM0  *size_Mat];
+			  fnwC   = &DD[DIR_MP0  *size_Mat];
+			  fteC   = &DD[DIR_P0P  *size_Mat];
+			  fbwC   = &DD[DIR_M0M  *size_Mat];
+			  fbeC   = &DD[DIR_P0M  *size_Mat];
+			  ftwC   = &DD[DIR_M0P  *size_Mat];
+			  ftnC   = &DD[DIR_0PP  *size_Mat];
+			  fbsC   = &DD[DIR_0MM  *size_Mat];
+			  fbnC   = &DD[DIR_0PM  *size_Mat];
+			  ftsC   = &DD[DIR_0MP  *size_Mat];
+			  fzeroC = &DD[DIR_000*size_Mat];
+			  ftneC  = &DD[DIR_PPP *size_Mat];
+			  ftswC  = &DD[DIR_MMP *size_Mat];
+			  ftseC  = &DD[DIR_PMP *size_Mat];
+			  ftnwC  = &DD[DIR_MPP *size_Mat];
+			  fbneC  = &DD[DIR_PPM *size_Mat];
+			  fbswC  = &DD[DIR_MMM *size_Mat];
+			  fbseC  = &DD[DIR_PMM *size_Mat];
+			  fbnwC  = &DD[DIR_MPM *size_Mat];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[E   *size_Mat];
-			  feC    = &DD[W   *size_Mat];
-			  fsC    = &DD[N   *size_Mat];
-			  fnC    = &DD[S   *size_Mat];
-			  fbC    = &DD[T   *size_Mat];
-			  ftC    = &DD[B   *size_Mat];
-			  fswC   = &DD[NE  *size_Mat];
-			  fneC   = &DD[SW  *size_Mat];
-			  fnwC   = &DD[SE  *size_Mat];
-			  fseC   = &DD[NW  *size_Mat];
-			  fbwC   = &DD[TE  *size_Mat];
-			  fteC   = &DD[BW  *size_Mat];
-			  ftwC   = &DD[BE  *size_Mat];
-			  fbeC   = &DD[TW  *size_Mat];
-			  fbsC   = &DD[TN  *size_Mat];
-			  ftnC   = &DD[BS  *size_Mat];
-			  ftsC   = &DD[BN  *size_Mat];
-			  fbnC   = &DD[TS  *size_Mat];
-			  fzeroC = &DD[REST*size_Mat];
-			  fbswC  = &DD[TNE *size_Mat];
-			  fbneC  = &DD[TSW *size_Mat];
-			  fbnwC  = &DD[TSE *size_Mat];
-			  fbseC  = &DD[TNW *size_Mat];
-			  ftswC  = &DD[BNE *size_Mat];
-			  ftneC  = &DD[BSW *size_Mat];
-			  ftnwC  = &DD[BSE *size_Mat];
-			  ftseC  = &DD[BNW *size_Mat];
+			  fwC    = &DD[DIR_P00   *size_Mat];
+			  feC    = &DD[DIR_M00   *size_Mat];
+			  fsC    = &DD[DIR_0P0   *size_Mat];
+			  fnC    = &DD[DIR_0M0   *size_Mat];
+			  fbC    = &DD[DIR_00P   *size_Mat];
+			  ftC    = &DD[DIR_00M   *size_Mat];
+			  fswC   = &DD[DIR_PP0  *size_Mat];
+			  fneC   = &DD[DIR_MM0  *size_Mat];
+			  fnwC   = &DD[DIR_PM0  *size_Mat];
+			  fseC   = &DD[DIR_MP0  *size_Mat];
+			  fbwC   = &DD[DIR_P0P  *size_Mat];
+			  fteC   = &DD[DIR_M0M  *size_Mat];
+			  ftwC   = &DD[DIR_P0M  *size_Mat];
+			  fbeC   = &DD[DIR_M0P  *size_Mat];
+			  fbsC   = &DD[DIR_0PP  *size_Mat];
+			  ftnC   = &DD[DIR_0MM  *size_Mat];
+			  ftsC   = &DD[DIR_0PM  *size_Mat];
+			  fbnC   = &DD[DIR_0MP  *size_Mat];
+			  fzeroC = &DD[DIR_000*size_Mat];
+			  fbswC  = &DD[DIR_PPP *size_Mat];
+			  fbneC  = &DD[DIR_MMP *size_Mat];
+			  fbnwC  = &DD[DIR_PMP *size_Mat];
+			  fbseC  = &DD[DIR_MPP *size_Mat];
+			  ftswC  = &DD[DIR_PPM *size_Mat];
+			  ftneC  = &DD[DIR_MMM *size_Mat];
+			  ftnwC  = &DD[DIR_PMM *size_Mat];
+			  ftseC  = &DD[DIR_MPM *size_Mat];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1030,7 +1030,7 @@ extern "C" __global__ void MoveParticles( real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
+__global__ void MoveParticlesWithoutBCs(   real* coordX,
 													  real* coordY,
 													  real* coordZ, 
 													  real* coordParticleXlocal,
@@ -1114,63 +1114,63 @@ extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
 		{
 		   if (isEvenTimestep==true)
 		   {
-			  feC    = &DD[E   *size_Mat];
-			  fwC    = &DD[W   *size_Mat];
-			  fnC    = &DD[N   *size_Mat];
-			  fsC    = &DD[S   *size_Mat];
-			  ftC    = &DD[T   *size_Mat];
-			  fbC    = &DD[B   *size_Mat];
-			  fneC   = &DD[NE  *size_Mat];
-			  fswC   = &DD[SW  *size_Mat];
-			  fseC   = &DD[SE  *size_Mat];
-			  fnwC   = &DD[NW  *size_Mat];
-			  fteC   = &DD[TE  *size_Mat];
-			  fbwC   = &DD[BW  *size_Mat];
-			  fbeC   = &DD[BE  *size_Mat];
-			  ftwC   = &DD[TW  *size_Mat];
-			  ftnC   = &DD[TN  *size_Mat];
-			  fbsC   = &DD[BS  *size_Mat];
-			  fbnC   = &DD[BN  *size_Mat];
-			  ftsC   = &DD[TS  *size_Mat];
-			  fzeroC = &DD[REST*size_Mat];
-			  ftneC  = &DD[TNE *size_Mat];
-			  ftswC  = &DD[TSW *size_Mat];
-			  ftseC  = &DD[TSE *size_Mat];
-			  ftnwC  = &DD[TNW *size_Mat];
-			  fbneC  = &DD[BNE *size_Mat];
-			  fbswC  = &DD[BSW *size_Mat];
-			  fbseC  = &DD[BSE *size_Mat];
-			  fbnwC  = &DD[BNW *size_Mat];
+			  feC    = &DD[DIR_P00   *size_Mat];
+			  fwC    = &DD[DIR_M00   *size_Mat];
+			  fnC    = &DD[DIR_0P0   *size_Mat];
+			  fsC    = &DD[DIR_0M0   *size_Mat];
+			  ftC    = &DD[DIR_00P   *size_Mat];
+			  fbC    = &DD[DIR_00M   *size_Mat];
+			  fneC   = &DD[DIR_PP0  *size_Mat];
+			  fswC   = &DD[DIR_MM0  *size_Mat];
+			  fseC   = &DD[DIR_PM0  *size_Mat];
+			  fnwC   = &DD[DIR_MP0  *size_Mat];
+			  fteC   = &DD[DIR_P0P  *size_Mat];
+			  fbwC   = &DD[DIR_M0M  *size_Mat];
+			  fbeC   = &DD[DIR_P0M  *size_Mat];
+			  ftwC   = &DD[DIR_M0P  *size_Mat];
+			  ftnC   = &DD[DIR_0PP  *size_Mat];
+			  fbsC   = &DD[DIR_0MM  *size_Mat];
+			  fbnC   = &DD[DIR_0PM  *size_Mat];
+			  ftsC   = &DD[DIR_0MP  *size_Mat];
+			  fzeroC = &DD[DIR_000*size_Mat];
+			  ftneC  = &DD[DIR_PPP *size_Mat];
+			  ftswC  = &DD[DIR_MMP *size_Mat];
+			  ftseC  = &DD[DIR_PMP *size_Mat];
+			  ftnwC  = &DD[DIR_MPP *size_Mat];
+			  fbneC  = &DD[DIR_PPM *size_Mat];
+			  fbswC  = &DD[DIR_MMM *size_Mat];
+			  fbseC  = &DD[DIR_PMM *size_Mat];
+			  fbnwC  = &DD[DIR_MPM *size_Mat];
 		   } 			 
 		   else			 
 		   {			 
-			  fwC    = &DD[E   *size_Mat];
-			  feC    = &DD[W   *size_Mat];
-			  fsC    = &DD[N   *size_Mat];
-			  fnC    = &DD[S   *size_Mat];
-			  fbC    = &DD[T   *size_Mat];
-			  ftC    = &DD[B   *size_Mat];
-			  fswC   = &DD[NE  *size_Mat];
-			  fneC   = &DD[SW  *size_Mat];
-			  fnwC   = &DD[SE  *size_Mat];
-			  fseC   = &DD[NW  *size_Mat];
-			  fbwC   = &DD[TE  *size_Mat];
-			  fteC   = &DD[BW  *size_Mat];
-			  ftwC   = &DD[BE  *size_Mat];
-			  fbeC   = &DD[TW  *size_Mat];
-			  fbsC   = &DD[TN  *size_Mat];
-			  ftnC   = &DD[BS  *size_Mat];
-			  ftsC   = &DD[BN  *size_Mat];
-			  fbnC   = &DD[TS  *size_Mat];
-			  fzeroC = &DD[REST*size_Mat];
-			  fbswC  = &DD[TNE *size_Mat];
-			  fbneC  = &DD[TSW *size_Mat];
-			  fbnwC  = &DD[TSE *size_Mat];
-			  fbseC  = &DD[TNW *size_Mat];
-			  ftswC  = &DD[BNE *size_Mat];
-			  ftneC  = &DD[BSW *size_Mat];
-			  ftnwC  = &DD[BSE *size_Mat];
-			  ftseC  = &DD[BNW *size_Mat];
+			  fwC    = &DD[DIR_P00   *size_Mat];
+			  feC    = &DD[DIR_M00   *size_Mat];
+			  fsC    = &DD[DIR_0P0   *size_Mat];
+			  fnC    = &DD[DIR_0M0   *size_Mat];
+			  fbC    = &DD[DIR_00P   *size_Mat];
+			  ftC    = &DD[DIR_00M   *size_Mat];
+			  fswC   = &DD[DIR_PP0  *size_Mat];
+			  fneC   = &DD[DIR_MM0  *size_Mat];
+			  fnwC   = &DD[DIR_PM0  *size_Mat];
+			  fseC   = &DD[DIR_MP0  *size_Mat];
+			  fbwC   = &DD[DIR_P0P  *size_Mat];
+			  fteC   = &DD[DIR_M0M  *size_Mat];
+			  ftwC   = &DD[DIR_P0M  *size_Mat];
+			  fbeC   = &DD[DIR_M0P  *size_Mat];
+			  fbsC   = &DD[DIR_0PP  *size_Mat];
+			  ftnC   = &DD[DIR_0MM  *size_Mat];
+			  ftsC   = &DD[DIR_0PM  *size_Mat];
+			  fbnC   = &DD[DIR_0MP  *size_Mat];
+			  fzeroC = &DD[DIR_000*size_Mat];
+			  fbswC  = &DD[DIR_PPP *size_Mat];
+			  fbneC  = &DD[DIR_MMP *size_Mat];
+			  fbnwC  = &DD[DIR_PMP *size_Mat];
+			  fbseC  = &DD[DIR_MPP *size_Mat];
+			  ftswC  = &DD[DIR_PPM *size_Mat];
+			  ftneC  = &DD[DIR_MMM *size_Mat];
+			  ftnwC  = &DD[DIR_PMM *size_Mat];
+			  ftseC  = &DD[DIR_MPM *size_Mat];
 		   }
 
 			  //////////////////////////////////////////////////////////////////////////
@@ -1896,7 +1896,7 @@ extern "C" __global__ void MoveParticlesWithoutBCs(   real* coordX,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
+__global__ void ParticleNoSlipDeviceComp27(real* coordX,
 													  real* coordY,
 													  real* coordZ, 
 													  real* coordParticleXlocal,
@@ -1937,63 +1937,63 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //Distributions27 D;
    //if (isEvenTimestep==true)
    //{
-   //   D.f[E   ] = &DD[E   *size_Mat];
-   //   D.f[W   ] = &DD[W   *size_Mat];
-   //   D.f[N   ] = &DD[N   *size_Mat];
-   //   D.f[S   ] = &DD[S   *size_Mat];
-   //   D.f[T   ] = &DD[T   *size_Mat];
-   //   D.f[B   ] = &DD[B   *size_Mat];
-   //   D.f[NE  ] = &DD[NE  *size_Mat];
-   //   D.f[SW  ] = &DD[SW  *size_Mat];
-   //   D.f[SE  ] = &DD[SE  *size_Mat];
-   //   D.f[NW  ] = &DD[NW  *size_Mat];
-   //   D.f[TE  ] = &DD[TE  *size_Mat];
-   //   D.f[BW  ] = &DD[BW  *size_Mat];
-   //   D.f[BE  ] = &DD[BE  *size_Mat];
-   //   D.f[TW  ] = &DD[TW  *size_Mat];
-   //   D.f[TN  ] = &DD[TN  *size_Mat];
-   //   D.f[BS  ] = &DD[BS  *size_Mat];
-   //   D.f[BN  ] = &DD[BN  *size_Mat];
-   //   D.f[TS  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[TNE *size_Mat];
-   //   D.f[TSW ] = &DD[TSW *size_Mat];
-   //   D.f[TSE ] = &DD[TSE *size_Mat];
-   //   D.f[TNW ] = &DD[TNW *size_Mat];
-   //   D.f[BNE ] = &DD[BNE *size_Mat];
-   //   D.f[BSW ] = &DD[BSW *size_Mat];
-   //   D.f[BSE ] = &DD[BSE *size_Mat];
-   //   D.f[BNW ] = &DD[BNW *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //} 
    //else
    //{
-   //   D.f[W   ] = &DD[E   *size_Mat];
-   //   D.f[E   ] = &DD[W   *size_Mat];
-   //   D.f[S   ] = &DD[N   *size_Mat];
-   //   D.f[N   ] = &DD[S   *size_Mat];
-   //   D.f[B   ] = &DD[T   *size_Mat];
-   //   D.f[T   ] = &DD[B   *size_Mat];
-   //   D.f[SW  ] = &DD[NE  *size_Mat];
-   //   D.f[NE  ] = &DD[SW  *size_Mat];
-   //   D.f[NW  ] = &DD[SE  *size_Mat];
-   //   D.f[SE  ] = &DD[NW  *size_Mat];
-   //   D.f[BW  ] = &DD[TE  *size_Mat];
-   //   D.f[TE  ] = &DD[BW  *size_Mat];
-   //   D.f[TW  ] = &DD[BE  *size_Mat];
-   //   D.f[BE  ] = &DD[TW  *size_Mat];
-   //   D.f[BS  ] = &DD[TN  *size_Mat];
-   //   D.f[TN  ] = &DD[BS  *size_Mat];
-   //   D.f[TS  ] = &DD[BN  *size_Mat];
-   //   D.f[BN  ] = &DD[TS  *size_Mat];
-   //   D.f[REST] = &DD[REST*size_Mat];
-   //   D.f[TNE ] = &DD[BSW *size_Mat];
-   //   D.f[TSW ] = &DD[BNE *size_Mat];
-   //   D.f[TSE ] = &DD[BNW *size_Mat];
-   //   D.f[TNW ] = &DD[BSE *size_Mat];
-   //   D.f[BNE ] = &DD[TSW *size_Mat];
-   //   D.f[BSW ] = &DD[TNE *size_Mat];
-   //   D.f[BSE ] = &DD[TNW *size_Mat];
-   //   D.f[BNW ] = &DD[TSE *size_Mat];
+   //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //}
    //////////////////////////////////////////////////////////////////////////////////
    //const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2015,128 +2015,128 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    // //         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
    // //         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
    // //         *q_dirBSE, *q_dirBNW; 
-   // //   q_dirE   = &QQ[E   * numberOfBCnodes];
-   //    q_dirW   = &QQ[W   * numberOfBCnodes];
-   // //   q_dirN   = &QQ[N   * numberOfBCnodes];
-   //    q_dirS   = &QQ[S   * numberOfBCnodes];
-   // //   q_dirT   = &QQ[T   * numberOfBCnodes];
-   //    q_dirB   = &QQ[B   * numberOfBCnodes];
-   // //   q_dirNE  = &QQ[NE  * numberOfBCnodes];
-   // //   q_dirSW  = &QQ[SW  * numberOfBCnodes];
-   // //   q_dirSE  = &QQ[SE  * numberOfBCnodes];
-   // //   q_dirNW  = &QQ[NW  * numberOfBCnodes];
-   // //   q_dirTE  = &QQ[TE  * numberOfBCnodes];
-   // //   q_dirBW  = &QQ[BW  * numberOfBCnodes];
-   // //   q_dirBE  = &QQ[BE  * numberOfBCnodes];
-   // //   q_dirTW  = &QQ[TW  * numberOfBCnodes];
-   // //   q_dirTN  = &QQ[TN  * numberOfBCnodes];
-   // //   q_dirBS  = &QQ[BS  * numberOfBCnodes];
-   // //   q_dirBN  = &QQ[BN  * numberOfBCnodes];
-   // //   q_dirTS  = &QQ[TS  * numberOfBCnodes];
-   // //   q_dirTNE = &QQ[TNE * numberOfBCnodes];
-   // //   q_dirTSW = &QQ[TSW * numberOfBCnodes];
-   // //   q_dirTSE = &QQ[TSE * numberOfBCnodes];
-   // //   q_dirTNW = &QQ[TNW * numberOfBCnodes];
-   // //   q_dirBNE = &QQ[BNE * numberOfBCnodes];
-   // //   q_dirBSW = &QQ[BSW * numberOfBCnodes];
-   // //   q_dirBSE = &QQ[BSE * numberOfBCnodes];
-   // //   q_dirBNW = &QQ[BNW * numberOfBCnodes];
+   // //   q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+   //    q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+   // //   q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+   //    q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+   // //   q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+   //    q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+   // //   q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+   // //   q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+   // //   q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+   // //   q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+   // //   q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+   // //   q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+   // //   q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+   // //   q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+   // //   q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+   // //   q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+   // //   q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+   // //   q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+   // //   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+   // //   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+   // //   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+   // //   q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+   // //   q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+   // //   q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+   // //   q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+   // //   q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
    // //           *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
    // //           *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
    // //           *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
    // //           *nx_dirBSE, *nx_dirBNW; 
-   // //   nx_dirE   = &NormalX[E   * numberOfBCnodes];
-   // //   nx_dirW   = &NormalX[W   * numberOfBCnodes];
-   // //   nx_dirN   = &NormalX[N   * numberOfBCnodes];
-   // //   nx_dirS   = &NormalX[S   * numberOfBCnodes];
-   // //   nx_dirT   = &NormalX[T   * numberOfBCnodes];
-   // //   nx_dirB   = &NormalX[B   * numberOfBCnodes];
-   // //   nx_dirNE  = &NormalX[NE  * numberOfBCnodes];
-   // //   nx_dirSW  = &NormalX[SW  * numberOfBCnodes];
-   // //   nx_dirSE  = &NormalX[SE  * numberOfBCnodes];
-   // //   nx_dirNW  = &NormalX[NW  * numberOfBCnodes];
-   // //   nx_dirTE  = &NormalX[TE  * numberOfBCnodes];
-   // //   nx_dirBW  = &NormalX[BW  * numberOfBCnodes];
-   // //   nx_dirBE  = &NormalX[BE  * numberOfBCnodes];
-   // //   nx_dirTW  = &NormalX[TW  * numberOfBCnodes];
-   // //   nx_dirTN  = &NormalX[TN  * numberOfBCnodes];
-   // //   nx_dirBS  = &NormalX[BS  * numberOfBCnodes];
-   // //   nx_dirBN  = &NormalX[BN  * numberOfBCnodes];
-   // //   nx_dirTS  = &NormalX[TS  * numberOfBCnodes];
-   // //   nx_dirTNE = &NormalX[TNE * numberOfBCnodes];
-   // //   nx_dirTSW = &NormalX[TSW * numberOfBCnodes];
-   // //   nx_dirTSE = &NormalX[TSE * numberOfBCnodes];
-   // //   nx_dirTNW = &NormalX[TNW * numberOfBCnodes];
-   // //   nx_dirBNE = &NormalX[BNE * numberOfBCnodes];
-   // //   nx_dirBSW = &NormalX[BSW * numberOfBCnodes];
-   // //   nx_dirBSE = &NormalX[BSE * numberOfBCnodes];
-   // //   nx_dirBNW = &NormalX[BNW * numberOfBCnodes];
+   // //   nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
+   // //   nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
+   // //   nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
+   // //   nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
+   // //   nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
+   // //   nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
+   // //   nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
+   // //   nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
+   // //   nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
+   // //   nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
+   // //   nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
+   // //   nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
+   // //   nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
+   // //   nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
+   // //   nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
+   // //   nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
+   // //   nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
+   // //   nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+   // //   nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
+   // //   nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
+   // //   nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
+   // //   nx_dirTNW = &NormalX[DIR_MPP * numberOfBCnodes];
+   // //   nx_dirBNE = &NormalX[DIR_PPM * numberOfBCnodes];
+   // //   nx_dirBSW = &NormalX[DIR_MMM * numberOfBCnodes];
+   // //   nx_dirBSE = &NormalX[DIR_PMM * numberOfBCnodes];
+   // //   nx_dirBNW = &NormalX[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
    // //           *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
    // //           *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
    // //           *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
    // //           *ny_dirBSE, *ny_dirBNW; 
-   // //   ny_dirE   = &NormalY[E   * numberOfBCnodes];
-   // //   ny_dirW   = &NormalY[W   * numberOfBCnodes];
-   // //   ny_dirN   = &NormalY[N   * numberOfBCnodes];
-   // //   ny_dirS   = &NormalY[S   * numberOfBCnodes];
-   // //   ny_dirT   = &NormalY[T   * numberOfBCnodes];
-   // //   ny_dirB   = &NormalY[B   * numberOfBCnodes];
-   // //   ny_dirNE  = &NormalY[NE  * numberOfBCnodes];
-   // //   ny_dirSW  = &NormalY[SW  * numberOfBCnodes];
-   // //   ny_dirSE  = &NormalY[SE  * numberOfBCnodes];
-   // //   ny_dirNW  = &NormalY[NW  * numberOfBCnodes];
-   // //   ny_dirTE  = &NormalY[TE  * numberOfBCnodes];
-   // //   ny_dirBW  = &NormalY[BW  * numberOfBCnodes];
-   // //   ny_dirBE  = &NormalY[BE  * numberOfBCnodes];
-   // //   ny_dirTW  = &NormalY[TW  * numberOfBCnodes];
-   // //   ny_dirTN  = &NormalY[TN  * numberOfBCnodes];
-   // //   ny_dirBS  = &NormalY[BS  * numberOfBCnodes];
-   // //   ny_dirBN  = &NormalY[BN  * numberOfBCnodes];
-   // //   ny_dirTS  = &NormalY[TS  * numberOfBCnodes];
-   // //   ny_dirTNE = &NormalY[TNE * numberOfBCnodes];
-   // //   ny_dirTSW = &NormalY[TSW * numberOfBCnodes];
-   // //   ny_dirTSE = &NormalY[TSE * numberOfBCnodes];
-   // //   ny_dirTNW = &NormalY[TNW * numberOfBCnodes];
-   // //   ny_dirBNE = &NormalY[BNE * numberOfBCnodes];
-   // //   ny_dirBSW = &NormalY[BSW * numberOfBCnodes];
-   // //   ny_dirBSE = &NormalY[BSE * numberOfBCnodes];
-   // //   ny_dirBNW = &NormalY[BNW * numberOfBCnodes];
+   // //   ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
+   // //   ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
+   // //   ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
+   // //   ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
+   // //   ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
+   // //   ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
+   // //   ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
+   // //   ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
+   // //   ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
+   // //   ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
+   // //   ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
+   // //   ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
+   // //   ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
+   // //   ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
+   // //   ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
+   // //   ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
+   // //   ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
+   // //   ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+   // //   ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
+   // //   ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
+   // //   ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
+   // //   ny_dirTNW = &NormalY[DIR_MPP * numberOfBCnodes];
+   // //   ny_dirBNE = &NormalY[DIR_PPM * numberOfBCnodes];
+   // //   ny_dirBSW = &NormalY[DIR_MMM * numberOfBCnodes];
+   // //   ny_dirBSE = &NormalY[DIR_PMM * numberOfBCnodes];
+   // //   ny_dirBNW = &NormalY[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////
    // //   real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
    // //           *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
    // //           *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
    // //           *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
    // //           *nz_dirBSE, *nz_dirBNW; 
-   // //   nz_dirE   = &NormalZ[E   * numberOfBCnodes];
-   // //   nz_dirW   = &NormalZ[W   * numberOfBCnodes];
-   // //   nz_dirN   = &NormalZ[N   * numberOfBCnodes];
-   // //   nz_dirS   = &NormalZ[S   * numberOfBCnodes];
-   // //   nz_dirT   = &NormalZ[T   * numberOfBCnodes];
-   // //   nz_dirB   = &NormalZ[B   * numberOfBCnodes];
-   // //   nz_dirNE  = &NormalZ[NE  * numberOfBCnodes];
-   // //   nz_dirSW  = &NormalZ[SW  * numberOfBCnodes];
-   // //   nz_dirSE  = &NormalZ[SE  * numberOfBCnodes];
-   // //   nz_dirNW  = &NormalZ[NW  * numberOfBCnodes];
-   // //   nz_dirTE  = &NormalZ[TE  * numberOfBCnodes];
-   // //   nz_dirBW  = &NormalZ[BW  * numberOfBCnodes];
-   // //   nz_dirBE  = &NormalZ[BE  * numberOfBCnodes];
-   // //   nz_dirTW  = &NormalZ[TW  * numberOfBCnodes];
-   // //   nz_dirTN  = &NormalZ[TN  * numberOfBCnodes];
-   // //   nz_dirBS  = &NormalZ[BS  * numberOfBCnodes];
-   // //   nz_dirBN  = &NormalZ[BN  * numberOfBCnodes];
-   // //   nz_dirTS  = &NormalZ[TS  * numberOfBCnodes];
-   // //   nz_dirTNE = &NormalZ[TNE * numberOfBCnodes];
-   // //   nz_dirTSW = &NormalZ[TSW * numberOfBCnodes];
-   // //   nz_dirTSE = &NormalZ[TSE * numberOfBCnodes];
-   // //   nz_dirTNW = &NormalZ[TNW * numberOfBCnodes];
-   // //   nz_dirBNE = &NormalZ[BNE * numberOfBCnodes];
-   // //   nz_dirBSW = &NormalZ[BSW * numberOfBCnodes];
-   // //   nz_dirBSE = &NormalZ[BSE * numberOfBCnodes];
-   // //   nz_dirBNW = &NormalZ[BNW * numberOfBCnodes];
+   // //   nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
+   // //   nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
+   // //   nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
+   // //   nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
+   // //   nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
+   // //   nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
+   // //   nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
+   // //   nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
+   // //   nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
+   // //   nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
+   // //   nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
+   // //   nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
+   // //   nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
+   // //   nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
+   // //   nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
+   // //   nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
+   // //   nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
+   // //   nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+   // //   nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
+   // //   nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
+   // //   nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
+   // //   nz_dirTNW = &NormalZ[DIR_MPP * numberOfBCnodes];
+   // //   nz_dirBNE = &NormalZ[DIR_PPM * numberOfBCnodes];
+   // //   nz_dirBSW = &NormalZ[DIR_MMM * numberOfBCnodes];
+   // //   nz_dirBSE = &NormalZ[DIR_PMM * numberOfBCnodes];
+   // //   nz_dirBNW = &NormalZ[DIR_MPM * numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //bool changeCell = false;
    //   unsigned int KQK  = k_Q[k];
@@ -2190,38 +2190,38 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   unsigned int ktne = KQK;
    //   unsigned int kbsw = neighborZ[ksw];
    //   ////////////////////////////////////////////////////////////////////////////////
-   //   real f_W    = (D.f[E   ])[ke   ];
-   //   real f_E    = (D.f[W   ])[kw   ];
-   //   real f_S    = (D.f[N   ])[kn   ];
-   //   real f_N    = (D.f[S   ])[ks   ];
-   //   real f_B    = (D.f[T   ])[kt   ];
-   //   real f_T    = (D.f[B   ])[kb   ];
-   //   real f_SW   = (D.f[NE  ])[kne  ];
-   //   real f_NE   = (D.f[SW  ])[ksw  ];
-   //   real f_NW   = (D.f[SE  ])[kse  ];
-   //   real f_SE   = (D.f[NW  ])[knw  ];
-   //   real f_BW   = (D.f[TE  ])[kte  ];
-   //   real f_TE   = (D.f[BW  ])[kbw  ];
-   //   real f_TW   = (D.f[BE  ])[kbe  ];
-   //   real f_BE   = (D.f[TW  ])[ktw  ];
-   //   real f_BS   = (D.f[TN  ])[ktn  ];
-   //   real f_TN   = (D.f[BS  ])[kbs  ];
-   //   real f_TS   = (D.f[BN  ])[kbn  ];
-   //   real f_BN   = (D.f[TS  ])[kts  ];
-   //   real f_BSW  = (D.f[TNE ])[ktne ];
-   //   real f_BNE  = (D.f[TSW ])[ktsw ];
-   //   real f_BNW  = (D.f[TSE ])[ktse ];
-   //   real f_BSE  = (D.f[TNW ])[ktnw ];
-   //   real f_TSW  = (D.f[BNE ])[kbne ];
-   //   real f_TNE  = (D.f[BSW ])[kbsw ];
-   //   real f_TNW  = (D.f[BSE ])[kbse ];
-   //   real f_TSE  = (D.f[BNW ])[kbnw ];
+   //   real f_W    = (D.f[DIR_P00   ])[ke   ];
+   //   real f_E    = (D.f[DIR_M00   ])[kw   ];
+   //   real f_S    = (D.f[DIR_0P0   ])[kn   ];
+   //   real f_N    = (D.f[DIR_0M0   ])[ks   ];
+   //   real f_B    = (D.f[DIR_00P   ])[kt   ];
+   //   real f_T    = (D.f[DIR_00M   ])[kb   ];
+   //   real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+   //   real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+   //   real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+   //   real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+   //   real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+   //   real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+   //   real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+   //   real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+   //   real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+   //   real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+   //   real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+   //   real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+   //   real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+   //   real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+   //   real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+   //   real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+   //   real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+   //   real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+   //   real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+   //   real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
    //   ////////////////////////////////////////////////////////////////////////////////
    //   // real feq, q;
    //   real vx1, vx2, vx3, drho;
    //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
    //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-   //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+   //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
    //   vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
    //             ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -2241,63 +2241,63 @@ extern "C" __global__ void ParticleNoSlipDeviceComp27(real* coordX,
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[E   ] = &DD[E   *size_Mat];
-   //      D.f[W   ] = &DD[W   *size_Mat];
-   //      D.f[N   ] = &DD[N   *size_Mat];
-   //      D.f[S   ] = &DD[S   *size_Mat];
-   //      D.f[T   ] = &DD[T   *size_Mat];
-   //      D.f[B   ] = &DD[B   *size_Mat];
-   //      D.f[NE  ] = &DD[NE  *size_Mat];
-   //      D.f[SW  ] = &DD[SW  *size_Mat];
-   //      D.f[SE  ] = &DD[SE  *size_Mat];
-   //      D.f[NW  ] = &DD[NW  *size_Mat];
-   //      D.f[TE  ] = &DD[TE  *size_Mat];
-   //      D.f[BW  ] = &DD[BW  *size_Mat];
-   //      D.f[BE  ] = &DD[BE  *size_Mat];
-   //      D.f[TW  ] = &DD[TW  *size_Mat];
-   //      D.f[TN  ] = &DD[TN  *size_Mat];
-   //      D.f[BS  ] = &DD[BS  *size_Mat];
-   //      D.f[BN  ] = &DD[BN  *size_Mat];
-   //      D.f[TS  ] = &DD[TS  *size_Mat];
-   //      D.f[REST] = &DD[REST*size_Mat];
-   //      D.f[TNE ] = &DD[TNE *size_Mat];
-   //      D.f[TSW ] = &DD[TSW *size_Mat];
-   //      D.f[TSE ] = &DD[TSE *size_Mat];
-   //      D.f[TNW ] = &DD[TNW *size_Mat];
-   //      D.f[BNE ] = &DD[BNE *size_Mat];
-   //      D.f[BSW ] = &DD[BSW *size_Mat];
-   //      D.f[BSE ] = &DD[BSE *size_Mat];
-   //      D.f[BNW ] = &DD[BNW *size_Mat];
+   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[W   ] = &DD[E   *size_Mat];
-   //      D.f[E   ] = &DD[W   *size_Mat];
-   //      D.f[S   ] = &DD[N   *size_Mat];
-   //      D.f[N   ] = &DD[S   *size_Mat];
-   //      D.f[B   ] = &DD[T   *size_Mat];
-   //      D.f[T   ] = &DD[B   *size_Mat];
-   //      D.f[SW  ] = &DD[NE  *size_Mat];
-   //      D.f[NE  ] = &DD[SW  *size_Mat];
-   //      D.f[NW  ] = &DD[SE  *size_Mat];
-   //      D.f[SE  ] = &DD[NW  *size_Mat];
-   //      D.f[BW  ] = &DD[TE  *size_Mat];
-   //      D.f[TE  ] = &DD[BW  *size_Mat];
-   //      D.f[TW  ] = &DD[BE  *size_Mat];
-   //      D.f[BE  ] = &DD[TW  *size_Mat];
-   //      D.f[BS  ] = &DD[TN  *size_Mat];
-   //      D.f[TN  ] = &DD[BS  *size_Mat];
-   //      D.f[TS  ] = &DD[BN  *size_Mat];
-   //      D.f[BN  ] = &DD[TS  *size_Mat];
-   //      D.f[REST] = &DD[REST*size_Mat];
-   //      D.f[TNE ] = &DD[BSW *size_Mat];
-   //      D.f[TSW ] = &DD[BNE *size_Mat];
-   //      D.f[TSE ] = &DD[BNW *size_Mat];
-   //      D.f[TNW ] = &DD[BSE *size_Mat];
-   //      D.f[BNE ] = &DD[TSW *size_Mat];
-   //      D.f[BSW ] = &DD[TNE *size_Mat];
-   //      D.f[BSE ] = &DD[TNW *size_Mat];
-   //      D.f[BNW ] = &DD[TSE *size_Mat];
+   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //   }
    //}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
index 2c4959eed943b4b72493a0c36630d8bc7ec83586..29e82196bdc2a22f03306b97a1ffd1bb6d5bc8a4 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/PressBCs27.cu
@@ -2,13 +2,16 @@
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
 #include "lbm/constants/NumericConstants.h"
+#include "lbm/MacroscopicQuantities.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
 #include "KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
+__global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 														 real* DD, 
 														 int* k_Q, 
 														 int* k_N, 
@@ -97,120 +100,120 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (D.f[E   ])[k1e   ];
-      real f1_W    = (D.f[W   ])[k1w   ];
-      real f1_N    = (D.f[N   ])[k1n   ];
-      real f1_S    = (D.f[S   ])[k1s   ];
-      real f1_T    = (D.f[T   ])[k1t   ];
-      real f1_B    = (D.f[B   ])[k1b   ];
-      real f1_NE   = (D.f[NE  ])[k1ne  ];
-      real f1_SW   = (D.f[SW  ])[k1sw  ];
-      real f1_SE   = (D.f[SE  ])[k1se  ];
-      real f1_NW   = (D.f[NW  ])[k1nw  ];
-      real f1_TE   = (D.f[TE  ])[k1te  ];
-      real f1_BW   = (D.f[BW  ])[k1bw  ];
-      real f1_BE   = (D.f[BE  ])[k1be  ];
-      real f1_TW   = (D.f[TW  ])[k1tw  ];
-      real f1_TN   = (D.f[TN  ])[k1tn  ];
-      real f1_BS   = (D.f[BS  ])[k1bs  ];
-      real f1_BN   = (D.f[BN  ])[k1bn  ];
-      real f1_TS   = (D.f[TS  ])[k1ts  ];
-      //real f1_ZERO = (D.f[REST])[k1zero];
-      real f1_TNE  = (D.f[TNE ])[k1tne ];
-      real f1_TSW  = (D.f[TSW ])[k1tsw ];
-      real f1_TSE  = (D.f[TSE ])[k1tse ];
-      real f1_TNW  = (D.f[TNW ])[k1tnw ];
-      real f1_BNE  = (D.f[BNE ])[k1bne ];
-      real f1_BSW  = (D.f[BSW ])[k1bsw ];
-      real f1_BSE  = (D.f[BSE ])[k1bse ];
-      real f1_BNW  = (D.f[BNW ])[k1bnw ];
+      real f1_E    = (D.f[DIR_P00   ])[k1e   ];
+      real f1_W    = (D.f[DIR_M00   ])[k1w   ];
+      real f1_N    = (D.f[DIR_0P0   ])[k1n   ];
+      real f1_S    = (D.f[DIR_0M0   ])[k1s   ];
+      real f1_T    = (D.f[DIR_00P   ])[k1t   ];
+      real f1_B    = (D.f[DIR_00M   ])[k1b   ];
+      real f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
+      real f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
+      real f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
+      real f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
+      real f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
+      real f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
+      real f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
+      real f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
+      real f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
+      real f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
+      real f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
+      real f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+      //real f1_ZERO = (D.f[DIR_000])[k1zero];
+      real f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
+      real f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
+      real f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
+      real f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
+      real f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
+      real f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
+      real f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
+      real f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (D.f[E   ])[ke   ];
-      real f_W    = (D.f[W   ])[kw   ];
-      real f_N    = (D.f[N   ])[kn   ];
-      real f_S    = (D.f[S   ])[ks   ];
-      real f_T    = (D.f[T   ])[kt   ];
-      real f_B    = (D.f[B   ])[kb   ];
-      real f_NE   = (D.f[NE  ])[kne  ];
-      real f_SW   = (D.f[SW  ])[ksw  ];
-      real f_SE   = (D.f[SE  ])[kse  ];
-      real f_NW   = (D.f[NW  ])[knw  ];
-      real f_TE   = (D.f[TE  ])[kte  ];
-      real f_BW   = (D.f[BW  ])[kbw  ];
-      real f_BE   = (D.f[BE  ])[kbe  ];
-      real f_TW   = (D.f[TW  ])[ktw  ];
-      real f_TN   = (D.f[TN  ])[ktn  ];
-      real f_BS   = (D.f[BS  ])[kbs  ];
-      real f_BN   = (D.f[BN  ])[kbn  ];
-      real f_TS   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      real f_TNE  = (D.f[TNE ])[ktne ];
-      real f_TSW  = (D.f[TSW ])[ktsw ];
-      real f_TSE  = (D.f[TSE ])[ktse ];
-      real f_TNW  = (D.f[TNW ])[ktnw ];
-      real f_BNE  = (D.f[BNE ])[kbne ];
-      real f_BSW  = (D.f[BSW ])[kbsw ];
-      real f_BSE  = (D.f[BSE ])[kbse ];
-      real f_BNW  = (D.f[BNW ])[kbnw ];
+      real f_E    = (D.f[DIR_P00   ])[ke   ];
+      real f_W    = (D.f[DIR_M00   ])[kw   ];
+      real f_N    = (D.f[DIR_0P0   ])[kn   ];
+      real f_S    = (D.f[DIR_0M0   ])[ks   ];
+      real f_T    = (D.f[DIR_00P   ])[kt   ];
+      real f_B    = (D.f[DIR_00M   ])[kb   ];
+      real f_NE   = (D.f[DIR_PP0  ])[kne  ];
+      real f_SW   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_SE   = (D.f[DIR_PM0  ])[kse  ];
+      real f_NW   = (D.f[DIR_MP0  ])[knw  ];
+      real f_TE   = (D.f[DIR_P0P  ])[kte  ];
+      real f_BW   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_BE   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_TW   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_TN   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_BS   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_BN   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      //real f_ZERO = (D.f[DIR_000])[kzero];
+      real f_TNE  = (D.f[DIR_PPP ])[ktne ];
+      real f_TSW  = (D.f[DIR_MMP ])[ktsw ];
+      real f_TSE  = (D.f[DIR_PMP ])[ktse ];
+      real f_TNW  = (D.f[DIR_MPP ])[ktnw ];
+      real f_BNE  = (D.f[DIR_PPM ])[kbne ];
+      real f_BSW  = (D.f[DIR_MMM ])[kbsw ];
+      real f_BSE  = (D.f[DIR_PMM ])[kbse ];
+      real f_BNW  = (D.f[DIR_MPM ])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       // real vx1, vx2, vx3;
       real drho, drho1;
@@ -218,10 +221,10 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 	  //Dichte
       drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
                 f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[REST])[k1zero]); 
+                f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
       //////////////////////////////////////////////////////////////////////////
 	  //Schallgeschwindigkeit
 	  real cs = c1o1 / sqrtf(c3o1);
@@ -321,106 +324,106 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 	  //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 	  // -X
-	  //(D.f[E   ])[ke   ] = f_E   ;
-	  //(D.f[SE  ])[kse  ] = f_SE  ;
-	  //(D.f[NE  ])[kne  ] = f_NE  ;
-	  //(D.f[BE  ])[kbe  ] = f_BE  ;
-	  //(D.f[TE  ])[kte  ] = f_TE  ;
-	  //(D.f[TSE ])[ktse ] = f_TSE ;
-	  //(D.f[TNE ])[ktne ] = f_TNE ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BNE ])[kbne ] = f_BNE ;     
+	  //(D.f[DIR_P00   ])[ke   ] = f_E   ;
+	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
+	  //(D.f[DIR_PP0  ])[kne  ] = f_NE  ;
+	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
+	  //(D.f[DIR_P0P  ])[kte  ] = f_TE  ;
+	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
+	  //(D.f[DIR_PPP ])[ktne ] = f_TNE ;
+	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;     
 	  // X
-	  (D.f[W   ])[kw   ] = f_W   ;
-	  (D.f[SW  ])[ksw  ] = f_SW  ;
-	  (D.f[NW  ])[knw  ] = f_NW  ;
-	  (D.f[BW  ])[kbw  ] = f_BW  ;
-	  (D.f[TW  ])[ktw  ] = f_TW  ;
-	  (D.f[TSW ])[ktsw ] = f_TSW ;
-	  (D.f[TNW ])[ktnw ] = f_TNW ;
-	  (D.f[BSW ])[kbsw ] = f_BSW ;
-	  (D.f[BNW ])[kbnw ] = f_BNW ;     
+	  (D.f[DIR_M00   ])[kw   ] = f_W   ;
+	  (D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
+	  (D.f[DIR_MP0  ])[knw  ] = f_NW  ;
+	  (D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
+	  (D.f[DIR_M0P  ])[ktw  ] = f_TW  ;
+	  (D.f[DIR_MMP ])[ktsw ] = f_TSW ;
+	  (D.f[DIR_MPP ])[ktnw ] = f_TNW ;
+	  (D.f[DIR_MMM ])[kbsw ] = f_BSW ;
+	  (D.f[DIR_MPM ])[kbnw ] = f_BNW ;     
 	  // Y
-	  //(D.f[S   ])[ks   ] = f_S   ;
-	  //(D.f[SE  ])[kse  ] = f_SE  ;
-	  //(D.f[SW  ])[ksw  ] = f_SW  ;
-	  //(D.f[TS  ])[kts  ] = f_TS  ;
-	  //(D.f[BS  ])[kbs  ] = f_BS  ;
-	  //(D.f[TSE ])[ktse ] = f_TSE ;
-	  //(D.f[TSW ])[ktsw ] = f_TSW ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BSW ])[kbsw ] = f_BSW ;     
+	  //(D.f[DIR_0M0   ])[ks   ] = f_S   ;
+	  //(D.f[DIR_PM0  ])[kse  ] = f_SE  ;
+	  //(D.f[DIR_MM0  ])[ksw  ] = f_SW  ;
+	  //(D.f[DIR_0MP  ])[kts  ] = f_TS  ;
+	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
+	  //(D.f[DIR_PMP ])[ktse ] = f_TSE ;
+	  //(D.f[DIR_MMP ])[ktsw ] = f_TSW ;
+	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
 	  // Z
-	  //(D.f[B   ])[kb   ] = f_B   ;
-	  //(D.f[BE  ])[kbe  ] = f_BE  ;
-	  //(D.f[BW  ])[kbw  ] = f_BW  ;
-	  //(D.f[BN  ])[kbn  ] = f_BN  ;
-	  //(D.f[BS  ])[kbs  ] = f_BS  ;
-	  //(D.f[BNE ])[kbne ] = f_BNE ;
-	  //(D.f[BNW ])[kbnw ] = f_BNW ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BSW ])[kbsw ] = f_BSW ;     
+	  //(D.f[DIR_00M   ])[kb   ] = f_B   ;
+	  //(D.f[DIR_P0M  ])[kbe  ] = f_BE  ;
+	  //(D.f[DIR_M0M  ])[kbw  ] = f_BW  ;
+	  //(D.f[DIR_0PM  ])[kbn  ] = f_BN  ;
+	  //(D.f[DIR_0MM  ])[kbs  ] = f_BS  ;
+	  //(D.f[DIR_PPM ])[kbne ] = f_BNE ;
+	  //(D.f[DIR_MPM ])[kbnw ] = f_BNW ;
+	  //(D.f[DIR_PMM ])[kbse ] = f_BSE ;
+	  //(D.f[DIR_MMM ])[kbsw ] = f_BSW ;     
       //////////////////////////////////////////////////////////////////////////
    }
 }
@@ -465,7 +468,7 @@ extern "C" __global__ void QInflowScaleByPressDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
+__global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 													real* DD, 
 													int* k_Q, 
 													int* k_N, 
@@ -554,95 +557,95 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true) //// ACHTUNG PREColl !!!!!!!!!!!!!!
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[E   ])[k1e   ];
-      f1_E    = (D.f[W   ])[k1w   ];
-      f1_S    = (D.f[N   ])[k1n   ];
-      f1_N    = (D.f[S   ])[k1s   ];
-      f1_B    = (D.f[T   ])[k1t   ];
-      f1_T    = (D.f[B   ])[k1b   ];
-      f1_SW   = (D.f[NE  ])[k1ne  ];
-      f1_NE   = (D.f[SW  ])[k1sw  ];
-      f1_NW   = (D.f[SE  ])[k1se  ];
-      f1_SE   = (D.f[NW  ])[k1nw  ];
-      f1_BW   = (D.f[TE  ])[k1te  ];
-      f1_TE   = (D.f[BW  ])[k1bw  ];
-      f1_TW   = (D.f[BE  ])[k1be  ];
-      f1_BE   = (D.f[TW  ])[k1tw  ];
-      f1_BS   = (D.f[TN  ])[k1tn  ];
-      f1_TN   = (D.f[BS  ])[k1bs  ];
-      f1_TS   = (D.f[BN  ])[k1bn  ];
-      f1_BN   = (D.f[TS  ])[k1ts  ];
-      f1_ZERO = (D.f[REST])[k1zero];
-      f1_BSW  = (D.f[TNE ])[k1tne ];
-      f1_BNE  = (D.f[TSW ])[k1tsw ];
-      f1_BNW  = (D.f[TSE ])[k1tse ];
-      f1_BSE  = (D.f[TNW ])[k1tnw ];
-      f1_TSW  = (D.f[BNE ])[k1bne ];
-      f1_TNE  = (D.f[BSW ])[k1bsw ];
-      f1_TNW  = (D.f[BSE ])[k1bse ];
-      f1_TSE  = (D.f[BNW ])[k1bnw ];
+      f1_W    = (D.f[DIR_P00   ])[k1e   ];
+      f1_E    = (D.f[DIR_M00   ])[k1w   ];
+      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+      f1_B    = (D.f[DIR_00P   ])[k1t   ];
+      f1_T    = (D.f[DIR_00M   ])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_ZERO = (D.f[DIR_000])[k1zero];
+      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
@@ -734,33 +737,33 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
       __syncthreads();
 
-      (D.f[E   ])[ke   ] = f1_W   ;  
-      (D.f[W   ])[kw   ] = f1_E   ;	
-      (D.f[N   ])[kn   ] = f1_S   ;	
-      (D.f[S   ])[ks   ] = f1_N   ;	
-      (D.f[T   ])[kt   ] = f1_B   ;	
-      (D.f[B   ])[kb   ] = f1_T   ;	
-      (D.f[NE  ])[kne  ] = f1_SW  ;	
-      (D.f[SW  ])[ksw  ] = f1_NE  ;	
-      (D.f[SE  ])[kse  ] = f1_NW  ;	
-      (D.f[NW  ])[knw  ] = f1_SE  ;	
-      (D.f[TE  ])[kte  ] = f1_BW  ;	
-      (D.f[BW  ])[kbw  ] = f1_TE  ;	
-      (D.f[BE  ])[kbe  ] = f1_TW  ;	
-      (D.f[TW  ])[ktw  ] = f1_BE  ;	
-      (D.f[TN  ])[ktn  ] = f1_BS  ;	
-      (D.f[BS  ])[kbs  ] = f1_TN  ;	
-      (D.f[BN  ])[kbn  ] = f1_TS  ;	
-      (D.f[TS  ])[kts  ] = f1_BN  ;	
-      (D.f[REST])[kzero] = f1_ZERO;	
-      (D.f[TNE ])[ktne ] = f1_BSW ;	
-      (D.f[TSW ])[ktsw ] = f1_BNE ;	
-      (D.f[TSE ])[ktse ] = f1_BNW ;	
-      (D.f[TNW ])[ktnw ] = f1_BSE ;	
-      (D.f[BNE ])[kbne ] = f1_TSW ;	
-      (D.f[BSW ])[kbsw ] = f1_TNE ;	
-      (D.f[BSE ])[kbse ] = f1_TNW ;	
-      (D.f[BNW ])[kbnw ] = f1_TSE ;       
+      (D.f[DIR_P00   ])[ke   ] = f1_W   ;  
+      (D.f[DIR_M00   ])[kw   ] = f1_E   ;	
+      (D.f[DIR_0P0   ])[kn   ] = f1_S   ;	
+      (D.f[DIR_0M0   ])[ks   ] = f1_N   ;	
+      (D.f[DIR_00P   ])[kt   ] = f1_B   ;	
+      (D.f[DIR_00M   ])[kb   ] = f1_T   ;	
+      (D.f[DIR_PP0  ])[kne  ] = f1_SW  ;	
+      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  ;	
+      (D.f[DIR_PM0  ])[kse  ] = f1_NW  ;	
+      (D.f[DIR_MP0  ])[knw  ] = f1_SE  ;	
+      (D.f[DIR_P0P  ])[kte  ] = f1_BW  ;	
+      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  ;	
+      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  ;	
+      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  ;	
+      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  ;	
+      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  ;	
+      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  ;	
+      (D.f[DIR_0MP  ])[kts  ] = f1_BN  ;	
+      (D.f[DIR_000])[kzero] = f1_ZERO;	
+      (D.f[DIR_PPP ])[ktne ] = f1_BSW ;	
+      (D.f[DIR_MMP ])[ktsw ] = f1_BNE ;	
+      (D.f[DIR_PMP ])[ktse ] = f1_BNW ;	
+      (D.f[DIR_MPP ])[ktnw ] = f1_BSE ;	
+      (D.f[DIR_PPM ])[kbne ] = f1_TSW ;	
+      (D.f[DIR_MMM ])[kbsw ] = f1_TNE ;	
+      (D.f[DIR_PMM ])[kbse ] = f1_TNW ;	
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE ;       
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -804,7 +807,7 @@ extern "C" __global__ void QPressDeviceIncompNEQ27( real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
+__global__ void QPressDeviceNEQ27(real* rhoBC,
                                              real* distribution, 
                                              int* bcNodeIndices,
                                              int* bcNeighborIndices,
@@ -914,40 +917,40 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions for neighboring node
       //!
-      real f1_W    = (dist.f[E   ])[k1e   ];
-      real f1_E    = (dist.f[W   ])[k1w   ];
-      real f1_S    = (dist.f[N   ])[k1n   ];
-      real f1_N    = (dist.f[S   ])[k1s   ];
-      real f1_B    = (dist.f[T   ])[k1t   ];
-      real f1_T    = (dist.f[B   ])[k1b   ];
-      real f1_SW   = (dist.f[NE  ])[k1ne  ];
-      real f1_NE   = (dist.f[SW  ])[k1sw  ];
-      real f1_NW   = (dist.f[SE  ])[k1se  ];
-      real f1_SE   = (dist.f[NW  ])[k1nw  ];
-      real f1_BW   = (dist.f[TE  ])[k1te  ];
-      real f1_TE   = (dist.f[BW  ])[k1bw  ];
-      real f1_TW   = (dist.f[BE  ])[k1be  ];
-      real f1_BE   = (dist.f[TW  ])[k1tw  ];
-      real f1_BS   = (dist.f[TN  ])[k1tn  ];
-      real f1_TN   = (dist.f[BS  ])[k1bs  ];
-      real f1_TS   = (dist.f[BN  ])[k1bn  ];
-      real f1_BN   = (dist.f[TS  ])[k1ts  ];
-      real f1_ZERO = (dist.f[REST])[k1zero];
-      real f1_BSW  = (dist.f[TNE ])[k1tne ];
-      real f1_BNE  = (dist.f[TSW ])[k1tsw ];
-      real f1_BNW  = (dist.f[TSE ])[k1tse ];
-      real f1_BSE  = (dist.f[TNW ])[k1tnw ];
-      real f1_TSW  = (dist.f[BNE ])[k1bne ];
-      real f1_TNE  = (dist.f[BSW ])[k1bsw ];
-      real f1_TNW  = (dist.f[BSE ])[k1bse ];
-      real f1_TSE  = (dist.f[BNW ])[k1bnw ];
+      real f1_W    = (dist.f[DIR_P00   ])[k1e   ];
+      real f1_E    = (dist.f[DIR_M00   ])[k1w   ];
+      real f1_S    = (dist.f[DIR_0P0   ])[k1n   ];
+      real f1_N    = (dist.f[DIR_0M0   ])[k1s   ];
+      real f1_B    = (dist.f[DIR_00P   ])[k1t   ];
+      real f1_T    = (dist.f[DIR_00M   ])[k1b   ];
+      real f1_SW   = (dist.f[DIR_PP0  ])[k1ne  ];
+      real f1_NE   = (dist.f[DIR_MM0  ])[k1sw  ];
+      real f1_NW   = (dist.f[DIR_PM0  ])[k1se  ];
+      real f1_SE   = (dist.f[DIR_MP0  ])[k1nw  ];
+      real f1_BW   = (dist.f[DIR_P0P  ])[k1te  ];
+      real f1_TE   = (dist.f[DIR_M0M  ])[k1bw  ];
+      real f1_TW   = (dist.f[DIR_P0M  ])[k1be  ];
+      real f1_BE   = (dist.f[DIR_M0P  ])[k1tw  ];
+      real f1_BS   = (dist.f[DIR_0PP  ])[k1tn  ];
+      real f1_TN   = (dist.f[DIR_0MM  ])[k1bs  ];
+      real f1_TS   = (dist.f[DIR_0PM  ])[k1bn  ];
+      real f1_BN   = (dist.f[DIR_0MP  ])[k1ts  ];
+      real f1_ZERO = (dist.f[DIR_000])[k1zero];
+      real f1_BSW  = (dist.f[DIR_PPP ])[k1tne ];
+      real f1_BNE  = (dist.f[DIR_MMP ])[k1tsw ];
+      real f1_BNW  = (dist.f[DIR_PMP ])[k1tse ];
+      real f1_BSE  = (dist.f[DIR_MPP ])[k1tnw ];
+      real f1_TSW  = (dist.f[DIR_PPM ])[k1bne ];
+      real f1_TNE  = (dist.f[DIR_MMM ])[k1bsw ];
+      real f1_TNW  = (dist.f[DIR_PMM ])[k1bse ];
+      real f1_TSE  = (dist.f[DIR_MPM ])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities (for neighboring node)
       //!
       real drho1 = f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
                    f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[REST])[kzero]); 
+                   f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((dist.f[DIR_000])[kzero]); 
 
       real vx1  = (((f1_TSE - f1_BNW) - (f1_TNW - f1_BSE)) + ((f1_TNE - f1_BSW) - (f1_TSW - f1_BNE)) +
                    ((f1_BE - f1_TW)   + (f1_TE - f1_BW))   + ((f1_SE - f1_NW)   + (f1_NE - f1_SW)) +
@@ -1037,33 +1040,33 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       //! write the new distributions to the bc nodes
       //!
-      (dist.f[E   ])[ke   ] = f1_W   ;
-      (dist.f[W   ])[kw   ] = f1_E   ;
-      (dist.f[N   ])[kn   ] = f1_S   ;
-      (dist.f[S   ])[ks   ] = f1_N   ;
-      (dist.f[T   ])[kt   ] = f1_B   ;
-      (dist.f[B   ])[kb   ] = f1_T   ;
-      (dist.f[NE  ])[kne  ] = f1_SW  ;
-      (dist.f[SW  ])[ksw  ] = f1_NE  ;
-      (dist.f[SE  ])[kse  ] = f1_NW  ;
-      (dist.f[NW  ])[knw  ] = f1_SE  ;
-      (dist.f[TE  ])[kte  ] = f1_BW  ;
-      (dist.f[BW  ])[kbw  ] = f1_TE  ;
-      (dist.f[BE  ])[kbe  ] = f1_TW  ;
-      (dist.f[TW  ])[ktw  ] = f1_BE  ;
-      (dist.f[TN  ])[ktn  ] = f1_BS  ;
-      (dist.f[BS  ])[kbs  ] = f1_TN  ;
-      (dist.f[BN  ])[kbn  ] = f1_TS  ;
-      (dist.f[TS  ])[kts  ] = f1_BN  ;
-      (dist.f[REST])[kzero] = f1_ZERO;
-      (dist.f[TNE ])[ktne ] = f1_BSW ;
-      (dist.f[TSW ])[ktsw ] = f1_BNE ;
-      (dist.f[TSE ])[ktse ] = f1_BNW ;
-      (dist.f[TNW ])[ktnw ] = f1_BSE ;
-      (dist.f[BNE ])[kbne ] = f1_TSW ;
-      (dist.f[BSW ])[kbsw ] = f1_TNE ;
-      (dist.f[BSE ])[kbse ] = f1_TNW ;
-      (dist.f[BNW ])[kbnw ] = f1_TSE ;
+      (dist.f[DIR_P00   ])[ke   ] = f1_W   ;
+      (dist.f[DIR_M00   ])[kw   ] = f1_E   ;
+      (dist.f[DIR_0P0   ])[kn   ] = f1_S   ;
+      (dist.f[DIR_0M0   ])[ks   ] = f1_N   ;
+      (dist.f[DIR_00P   ])[kt   ] = f1_B   ;
+      (dist.f[DIR_00M   ])[kb   ] = f1_T   ;
+      (dist.f[DIR_PP0  ])[kne  ] = f1_SW  ;
+      (dist.f[DIR_MM0  ])[ksw  ] = f1_NE  ;
+      (dist.f[DIR_PM0  ])[kse  ] = f1_NW  ;
+      (dist.f[DIR_MP0  ])[knw  ] = f1_SE  ;
+      (dist.f[DIR_P0P  ])[kte  ] = f1_BW  ;
+      (dist.f[DIR_M0M  ])[kbw  ] = f1_TE  ;
+      (dist.f[DIR_P0M  ])[kbe  ] = f1_TW  ;
+      (dist.f[DIR_M0P  ])[ktw  ] = f1_BE  ;
+      (dist.f[DIR_0PP  ])[ktn  ] = f1_BS  ;
+      (dist.f[DIR_0MM  ])[kbs  ] = f1_TN  ;
+      (dist.f[DIR_0PM  ])[kbn  ] = f1_TS  ;
+      (dist.f[DIR_0MP  ])[kts  ] = f1_BN  ;
+      (dist.f[DIR_000])[kzero] = f1_ZERO;
+      (dist.f[DIR_PPP ])[ktne ] = f1_BSW ;
+      (dist.f[DIR_MMP ])[ktsw ] = f1_BNE ;
+      (dist.f[DIR_PMP ])[ktse ] = f1_BNW ;
+      (dist.f[DIR_MPP ])[ktnw ] = f1_BSE ;
+      (dist.f[DIR_PPM ])[kbne ] = f1_TSW ;
+      (dist.f[DIR_MMM ])[kbsw ] = f1_TNE ;
+      (dist.f[DIR_PMM ])[kbse ] = f1_TNW ;
+      (dist.f[DIR_MPM ])[kbnw ] = f1_TSE ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1107,7 +1110,7 @@ extern "C" __global__ void QPressDeviceNEQ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_BC_Press_East27( int nx, 
+__global__ void LB_BC_Press_East27( int nx, 
                                                int ny, 
                                                int tz, 
                                                unsigned int* bcMatD, 
@@ -1137,63 +1140,63 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////
       ////////////////////////////////////////////////////////////////////////////////
@@ -1312,66 +1315,66 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                    f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[E   ])[k1e   ];
-      f1_E    = (D.f[W   ])[k1w   ];
-      f1_S    = (D.f[N   ])[k1n   ];
-      f1_N    = (D.f[S   ])[k1s   ];
-      f1_B    = (D.f[T   ])[k1t   ];
-      f1_T    = (D.f[B   ])[k1b   ];
-      f1_SW   = (D.f[NE  ])[k1ne  ];
-      f1_NE   = (D.f[SW  ])[k1sw  ];
-      f1_NW   = (D.f[SE  ])[k1se  ];
-      f1_SE   = (D.f[NW  ])[k1nw  ];
-      f1_BW   = (D.f[TE  ])[k1te  ];
-      f1_TE   = (D.f[BW  ])[k1bw  ];
-      f1_TW   = (D.f[BE  ])[k1be  ];
-      f1_BE   = (D.f[TW  ])[k1tw  ];
-      f1_BS   = (D.f[TN  ])[k1tn  ];
-      f1_TN   = (D.f[BS  ])[k1bs  ];
-      f1_TS   = (D.f[BN  ])[k1bn  ];
-      f1_BN   = (D.f[TS  ])[k1ts  ];
-      f1_ZERO = (D.f[REST])[k1zero];
-      f1_BSW  = (D.f[TNE ])[k1tne ];
-      f1_BNE  = (D.f[TSW ])[k1tsw ];
-      f1_BNW  = (D.f[TSE ])[k1tse ];
-      f1_BSE  = (D.f[TNW ])[k1tnw ];
-      f1_TSW  = (D.f[BNE ])[k1bne ];
-      f1_TNE  = (D.f[BSW ])[k1bsw ];
-      f1_TNW  = (D.f[BSE ])[k1bse ];
-      f1_TSE  = (D.f[BNW ])[k1bnw ];
+      f1_W    = (D.f[DIR_P00   ])[k1e   ];
+      f1_E    = (D.f[DIR_M00   ])[k1w   ];
+      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+      f1_B    = (D.f[DIR_00P   ])[k1t   ];
+      f1_T    = (D.f[DIR_00M   ])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_ZERO = (D.f[DIR_000])[k1zero];
+      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
                         f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
 
       __syncthreads();
 
-      (D.f[E   ])[ke   ] = f1_W   -c2o27*drho1;
-      (D.f[W   ])[kw   ] = f1_E   -c2o27*drho1;
-      (D.f[N   ])[kn   ] = f1_S   -c2o27*drho1;
-      (D.f[S   ])[ks   ] = f1_N   -c2o27*drho1;
-      (D.f[T   ])[kt   ] = f1_B   -c2o27*drho1;
-      (D.f[B   ])[kb   ] = f1_T   -c2o27*drho1;
-      (D.f[NE  ])[kne  ] = f1_SW  -c1o54*drho1;
-      (D.f[SW  ])[ksw  ] = f1_NE  -c1o54*drho1;
-      (D.f[SE  ])[kse  ] = f1_NW  -c1o54*drho1;
-      (D.f[NW  ])[knw  ] = f1_SE  -c1o54*drho1;
-      (D.f[TE  ])[kte  ] = f1_BW  -c1o54*drho1;
-      (D.f[BW  ])[kbw  ] = f1_TE  -c1o54*drho1;
-      (D.f[BE  ])[kbe  ] = f1_TW  -c1o54*drho1;
-      (D.f[TW  ])[ktw  ] = f1_BE  -c1o54*drho1;
-      (D.f[TN  ])[ktn  ] = f1_BS  -c1o54*drho1;
-      (D.f[BS  ])[kbs  ] = f1_TN  -c1o54*drho1;
-      (D.f[BN  ])[kbn  ] = f1_TS  -c1o54*drho1;
-      (D.f[TS  ])[kts  ] = f1_BN  -c1o54*drho1;
-      (D.f[REST])[kzero] = f1_ZERO-c8o27*drho1;
-      (D.f[TNE ])[ktne ] = f1_BSW -c1o216*drho1;
-      (D.f[TSW ])[ktsw ] = f1_BNE -c1o216*drho1;
-      (D.f[TSE ])[ktse ] = f1_BNW -c1o216*drho1;
-      (D.f[TNW ])[ktnw ] = f1_BSE -c1o216*drho1;
-      (D.f[BNE ])[kbne ] = f1_TSW -c1o216*drho1;
-      (D.f[BSW ])[kbsw ] = f1_TNE -c1o216*drho1;
-      (D.f[BSE ])[kbse ] = f1_TNW -c1o216*drho1;
-      (D.f[BNW ])[kbnw ] = f1_TSE -c1o216*drho1;       
+      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;
+      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;
+      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;
+      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;
+      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;
+      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;
+      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;
+      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;
+      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;
+      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;
+      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;
+      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;
+      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;
+      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;
+      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;
+      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;
+      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;
+      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;
+      (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;
+      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;
+      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;
+      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;
+      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;
+      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;
+      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;
+      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;       
    }
    __syncthreads();
 }          
@@ -1416,7 +1419,7 @@ extern "C" __global__ void LB_BC_Press_East27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDevice27(real* rhoBC,
+__global__ void QPressDevice27(real* rhoBC,
                                            real* DD, 
                                            int* k_Q, 
                                            real* QQ,
@@ -1431,63 +1434,63 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1507,32 +1510,32 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1567,32 +1570,32 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real q, vx1, vx2, vx3, drho;
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
@@ -1616,245 +1619,245 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[W])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-         //(D.f[E])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
+         (D.f[DIR_M00])[kw]=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
+         //(D.f[DIR_P00])[ke]=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[E])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq); 
-         //(D.f[W])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
+         (D.f[DIR_P00])[ke]=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq); 
+         //(D.f[DIR_M00])[kw]=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[S])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         //(D.f[N])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
+         (D.f[DIR_0M0])[ks]=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
+         //(D.f[DIR_0P0])[kn]=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[N])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-         //(D.f[S])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
+         (D.f[DIR_0P0])[kn]=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
+         //(D.f[DIR_0M0])[ks]=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[B])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-         //(D.f[T])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
+         (D.f[DIR_00M])[kb]=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
+         //(D.f[DIR_00P])[kt]=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[T])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
-         //(D.f[B])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
+         (D.f[DIR_00P])[kt]=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
+         //(D.f[DIR_00M])[kb]=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SW])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         //(D.f[NE])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
+         (D.f[DIR_MM0])[ksw]=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
+         //(D.f[DIR_PP0])[kne]=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NE])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         //(D.f[SW])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
+         (D.f[DIR_PP0])[kne]=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
+         //(D.f[DIR_MM0])[ksw]=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NW])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         //(D.f[SE])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
+         (D.f[DIR_MP0])[knw]=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
+         //(D.f[DIR_PM0])[kse]=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SE])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         //(D.f[NW])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
+         (D.f[DIR_PM0])[kse]=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
+         //(D.f[DIR_MP0])[knw]=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BW])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         //(D.f[TE])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
+         (D.f[DIR_M0M])[kbw]=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
+         //(D.f[DIR_P0P])[kte]=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TE])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         //(D.f[BW])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
+         (D.f[DIR_P0P])[kte]=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
+         //(D.f[DIR_M0M])[kbw]=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TW])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         //(D.f[BE])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
+         (D.f[DIR_M0P])[ktw]=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
+         //(D.f[DIR_P0M])[kbe]=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BE])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         //(D.f[TW])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
+         (D.f[DIR_P0M])[kbe]=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
+         //(D.f[DIR_M0P])[ktw]=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BS])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         //(D.f[TN])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
+         (D.f[DIR_0MM])[kbs]=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
+         //(D.f[DIR_0PP])[ktn]=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TN])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         //(D.f[BS])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
+         (D.f[DIR_0PP])[ktn]=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
+         //(D.f[DIR_0MM])[kbs]=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TS])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         //(D.f[BN])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
+         (D.f[DIR_0MP])[kts]=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
+         //(D.f[DIR_0PM])[kbn]=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BN])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         //(D.f[TS])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
+         (D.f[DIR_0PM])[kbn]=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
+         //(D.f[DIR_0MP])[kts]=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSW])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNE])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_MMM])[kbsw]=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
+         //(D.f[DIR_PPP])[ktne]=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNE])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSW])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_PPP])[ktne]=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
+         //(D.f[DIR_MMM])[kbsw]=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSW])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNE])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_MMP])[ktsw]=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
+         //(D.f[DIR_PPM])[kbne]=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNE])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSW])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_PPM])[kbne]=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
+         //(D.f[DIR_MMP])[ktsw]=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNW])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSE])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
+         (D.f[DIR_MPM])[kbnw]=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
+         //(D.f[DIR_PMP])[ktse]=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSE])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNW])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
+         (D.f[DIR_PMP])[ktse]=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
+         //(D.f[DIR_MPM])[kbnw]=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNW])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSE])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
+         (D.f[DIR_MPP])[ktnw]=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
+         //(D.f[DIR_PMM])[kbse]=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSE])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNW])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
+         (D.f[DIR_PMM])[kbse]=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
+         //(D.f[DIR_MPP])[ktnw]=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
       }
    }
 }
@@ -1899,7 +1902,7 @@ extern "C" __global__ void QPressDevice27(real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
+__global__ void QPressDeviceAntiBB27(   real* rhoBC,
 												   real* vx,
 												   real* vy,
 												   real* vz,
@@ -1917,63 +1920,63 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1993,32 +1996,32 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   *numberOfBCnodes];
-      q_dirW   = &QQ[W   *numberOfBCnodes];
-      q_dirN   = &QQ[N   *numberOfBCnodes];
-      q_dirS   = &QQ[S   *numberOfBCnodes];
-      q_dirT   = &QQ[T   *numberOfBCnodes];
-      q_dirB   = &QQ[B   *numberOfBCnodes];
-      q_dirNE  = &QQ[NE  *numberOfBCnodes];
-      q_dirSW  = &QQ[SW  *numberOfBCnodes];
-      q_dirSE  = &QQ[SE  *numberOfBCnodes];
-      q_dirNW  = &QQ[NW  *numberOfBCnodes];
-      q_dirTE  = &QQ[TE  *numberOfBCnodes];
-      q_dirBW  = &QQ[BW  *numberOfBCnodes];
-      q_dirBE  = &QQ[BE  *numberOfBCnodes];
-      q_dirTW  = &QQ[TW  *numberOfBCnodes];
-      q_dirTN  = &QQ[TN  *numberOfBCnodes];
-      q_dirBS  = &QQ[BS  *numberOfBCnodes];
-      q_dirBN  = &QQ[BN  *numberOfBCnodes];
-      q_dirTS  = &QQ[TS  *numberOfBCnodes];
-      q_dirTNE = &QQ[TNE *numberOfBCnodes];
-      q_dirTSW = &QQ[TSW *numberOfBCnodes];
-      q_dirTSE = &QQ[TSE *numberOfBCnodes];
-      q_dirTNW = &QQ[TNW *numberOfBCnodes];
-      q_dirBNE = &QQ[BNE *numberOfBCnodes];
-      q_dirBSW = &QQ[BSW *numberOfBCnodes];
-      q_dirBSE = &QQ[BSE *numberOfBCnodes];
-      q_dirBNW = &QQ[BNW *numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   *numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   *numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   *numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   *numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   *numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   *numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  *numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  *numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  *numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  *numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  *numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  *numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  *numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  *numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  *numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  *numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  *numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  *numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP *numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP *numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP *numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP *numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM *numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM *numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM *numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM *numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2053,33 +2056,33 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
-      f_ZERO = (D.f[REST])[kzero];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+      f_ZERO = (D.f[DIR_000])[kzero];
       ////////////////////////////////////////////////////////////////////////////////
       //real vx1, vx2, vx3, drho;
       //vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
@@ -2107,219 +2110,219 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
       ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[W])[kw]=f_W-c2o27*drho; 
+         (D.f[DIR_M00])[kw]=f_W-c2o27*drho; 
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[E])[ke]=f_E-c2o27*drho;
+         (D.f[DIR_P00])[ke]=f_E-c2o27*drho;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[S])[ks]=f_S-c2o27*drho; 
+         (D.f[DIR_0M0])[ks]=f_S-c2o27*drho; 
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[N])[kn]=f_N-c2o27*drho; 
+         (D.f[DIR_0P0])[kn]=f_N-c2o27*drho; 
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[B])[kb]=f_B-c2o27*drho; 
+         (D.f[DIR_00M])[kb]=f_B-c2o27*drho; 
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[T])[kt]=f_T-c2o27*drho;
+         (D.f[DIR_00P])[kt]=f_T-c2o27*drho;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SW])[ksw]=f_SW-c1o54*drho;
+         (D.f[DIR_MM0])[ksw]=f_SW-c1o54*drho;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NE])[kne]=f_NE-c1o54*drho;
+         (D.f[DIR_PP0])[kne]=f_NE-c1o54*drho;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NW])[knw]=f_NW-c1o54*drho;
+         (D.f[DIR_MP0])[knw]=f_NW-c1o54*drho;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SE])[kse]=f_SE-c1o54*drho;
+         (D.f[DIR_PM0])[kse]=f_SE-c1o54*drho;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BW])[kbw]=f_BW-c1o54*drho; 
+         (D.f[DIR_M0M])[kbw]=f_BW-c1o54*drho; 
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TE])[kte]=f_TE-c1o54*drho; 
+         (D.f[DIR_P0P])[kte]=f_TE-c1o54*drho; 
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TW])[ktw]=f_TW-c1o54*drho;
+         (D.f[DIR_M0P])[ktw]=f_TW-c1o54*drho;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BE])[kbe]=f_BE-c1o54*drho;
+         (D.f[DIR_P0M])[kbe]=f_BE-c1o54*drho;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BS])[kbs]=f_BS-c1o54*drho;
+         (D.f[DIR_0MM])[kbs]=f_BS-c1o54*drho;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TN])[ktn]=f_TN-c1o54*drho;
+         (D.f[DIR_0PP])[ktn]=f_TN-c1o54*drho;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TS])[kts]=f_TS-c1o54*drho;
+         (D.f[DIR_0MP])[kts]=f_TS-c1o54*drho;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BN])[kbn]=f_BN-c1o54*drho;
+         (D.f[DIR_0PM])[kbn]=f_BN-c1o54*drho;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSW])[kbsw]=f_BSW-c1o216*drho;
+         (D.f[DIR_MMM])[kbsw]=f_BSW-c1o216*drho;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNE])[ktne]=f_TNE-c1o216*drho;
+         (D.f[DIR_PPP])[ktne]=f_TNE-c1o216*drho;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSW])[ktsw]=f_TSW-c1o216*drho;
+         (D.f[DIR_MMP])[ktsw]=f_TSW-c1o216*drho;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNE])[kbne]=f_BNE-c1o216*drho;
+         (D.f[DIR_PPM])[kbne]=f_BNE-c1o216*drho;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNW])[kbnw]=f_BNW-c1o216*drho;
+         (D.f[DIR_MPM])[kbnw]=f_BNW-c1o216*drho;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSE])[ktse]=f_TSE-c1o216*drho;
+         (D.f[DIR_PMP])[ktse]=f_TSE-c1o216*drho;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNW])[ktnw]=f_TNW-c1o216*drho;
+         (D.f[DIR_MPP])[ktnw]=f_TNW-c1o216*drho;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSE])[kbse]=f_BSE-c1o216*drho;
+         (D.f[DIR_PMM])[kbse]=f_BSE-c1o216*drho;
       }
    }
 }
@@ -2364,7 +2367,7 @@ extern "C" __global__ void QPressDeviceAntiBB27(   real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
+__global__ void QPressDeviceFixBackflow27( real* rhoBC,
                                                       real* DD, 
                                                       int* k_Q, 
                                                       int numberOfBCnodes, 
@@ -2426,92 +2429,92 @@ extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-         (D.f[W])[kw]       = c2o27  * deltaRho;
-         (D.f[E])[ke]       = c2o27  * deltaRho;
-         (D.f[S])[ks]       = c2o27  * deltaRho;
-         (D.f[N])[kn]       = c2o27  * deltaRho;
-         (D.f[B])[kb]       = c2o27  * deltaRho;
-         (D.f[T])[kt]       = c2o27  * deltaRho;
-         (D.f[SW])[ksw]     = c1o54  * deltaRho;
-         (D.f[NE])[kne]     = c1o54  * deltaRho;
-         (D.f[NW])[knw]     = c1o54  * deltaRho;
-         (D.f[SE])[kse]     = c1o54  * deltaRho;
-         (D.f[BW])[kbw]     = c1o54  * deltaRho;
-         (D.f[TE])[kte]     = c1o54  * deltaRho;
-         (D.f[TW])[ktw]     = c1o54  * deltaRho;
-         (D.f[BE])[kbe]     = c1o54  * deltaRho;
-         (D.f[BS])[kbs]     = c1o54  * deltaRho;
-         (D.f[TN])[ktn]     = c1o54  * deltaRho;
-         (D.f[TS])[kts]     = c1o54  * deltaRho;
-         (D.f[BN])[kbn]     = c1o54  * deltaRho;
-         (D.f[BSW])[kbsw]   = c1o216 * deltaRho;
-         (D.f[TNE])[ktne]   = c1o216 * deltaRho;
-         (D.f[TSW])[ktsw]   = c1o216 * deltaRho;
-         (D.f[BNE])[kbne]   = c1o216 * deltaRho;
-         (D.f[BNW])[kbnw]   = c1o216 * deltaRho;
-         (D.f[TSE])[ktse]   = c1o216 * deltaRho;
-         (D.f[TNW])[ktnw]   = c1o216 * deltaRho;
-         (D.f[BSE])[kbse]   = c1o216 * deltaRho;
-         (D.f[REST])[kzero] = c8o27  * deltaRho;
+         (D.f[DIR_M00])[kw]       = c2o27  * deltaRho;
+         (D.f[DIR_P00])[ke]       = c2o27  * deltaRho;
+         (D.f[DIR_0M0])[ks]       = c2o27  * deltaRho;
+         (D.f[DIR_0P0])[kn]       = c2o27  * deltaRho;
+         (D.f[DIR_00M])[kb]       = c2o27  * deltaRho;
+         (D.f[DIR_00P])[kt]       = c2o27  * deltaRho;
+         (D.f[DIR_MM0])[ksw]     = c1o54  * deltaRho;
+         (D.f[DIR_PP0])[kne]     = c1o54  * deltaRho;
+         (D.f[DIR_MP0])[knw]     = c1o54  * deltaRho;
+         (D.f[DIR_PM0])[kse]     = c1o54  * deltaRho;
+         (D.f[DIR_M0M])[kbw]     = c1o54  * deltaRho;
+         (D.f[DIR_P0P])[kte]     = c1o54  * deltaRho;
+         (D.f[DIR_M0P])[ktw]     = c1o54  * deltaRho;
+         (D.f[DIR_P0M])[kbe]     = c1o54  * deltaRho;
+         (D.f[DIR_0MM])[kbs]     = c1o54  * deltaRho;
+         (D.f[DIR_0PP])[ktn]     = c1o54  * deltaRho;
+         (D.f[DIR_0MP])[kts]     = c1o54  * deltaRho;
+         (D.f[DIR_0PM])[kbn]     = c1o54  * deltaRho;
+         (D.f[DIR_MMM])[kbsw]   = c1o216 * deltaRho;
+         (D.f[DIR_PPP])[ktne]   = c1o216 * deltaRho;
+         (D.f[DIR_MMP])[ktsw]   = c1o216 * deltaRho;
+         (D.f[DIR_PPM])[kbne]   = c1o216 * deltaRho;
+         (D.f[DIR_MPM])[kbnw]   = c1o216 * deltaRho;
+         (D.f[DIR_PMP])[ktse]   = c1o216 * deltaRho;
+         (D.f[DIR_MPP])[ktnw]   = c1o216 * deltaRho;
+         (D.f[DIR_PMM])[kbse]   = c1o216 * deltaRho;
+         (D.f[DIR_000])[kzero] = c8o27  * deltaRho;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2555,7 +2558,7 @@ extern "C" __global__ void QPressDeviceFixBackflow27( real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
+__global__ void QPressDeviceDirDepBot27(  real* rhoBC,
                                                      real* DD, 
                                                      int* k_Q, 
                                                      int numberOfBCnodes, 
@@ -2617,86 +2620,86 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E,f_W,f_N,f_S,f_T,f_NE,f_SW,f_SE,f_NW,f_TE,f_TW,f_TN,f_TS,f_ZERO,f_TNE,f_TSW,f_TSE,f_TNW;//,
             //f_B,f_BW,f_BE,f_BS,f_BN,f_BSW,f_BNE,f_BNW,f_BSE;
 
-      f_E    = (D.f[E   ])[ke   ];
-      f_W    = (D.f[W   ])[kw   ];
-      f_N    = (D.f[N   ])[kn   ];
-      f_S    = (D.f[S   ])[ks   ];
-      f_T    = (D.f[T   ])[kt   ];
-      f_NE   = (D.f[NE  ])[kne  ];
-      f_SW   = (D.f[SW  ])[ksw  ];
-      f_SE   = (D.f[SE  ])[kse  ];
-      f_NW   = (D.f[NW  ])[knw  ];
-      f_TE   = (D.f[TE  ])[kte  ];
-      f_TW   = (D.f[TW  ])[ktw  ];
-      f_TN   = (D.f[TN  ])[ktn  ];
-      f_TS   = (D.f[TS  ])[kts  ];
-      f_ZERO = (D.f[REST])[kzero];
-      f_TNE  = (D.f[TNE ])[ktne ];
-      f_TSW  = (D.f[TSW ])[ktsw ];
-      f_TSE  = (D.f[TSE ])[ktse ];
-      f_TNW  = (D.f[TNW ])[ktnw ];
+      f_E    = (D.f[DIR_P00   ])[ke   ];
+      f_W    = (D.f[DIR_M00   ])[kw   ];
+      f_N    = (D.f[DIR_0P0   ])[kn   ];
+      f_S    = (D.f[DIR_0M0   ])[ks   ];
+      f_T    = (D.f[DIR_00P   ])[kt   ];
+      f_NE   = (D.f[DIR_PP0  ])[kne  ];
+      f_SW   = (D.f[DIR_MM0  ])[ksw  ];
+      f_SE   = (D.f[DIR_PM0  ])[kse  ];
+      f_NW   = (D.f[DIR_MP0  ])[knw  ];
+      f_TE   = (D.f[DIR_P0P  ])[kte  ];
+      f_TW   = (D.f[DIR_M0P  ])[ktw  ];
+      f_TN   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f_ZERO = (D.f[DIR_000])[kzero];
+      f_TNE  = (D.f[DIR_PPP ])[ktne ];
+      f_TSW  = (D.f[DIR_MMP ])[ktsw ];
+      f_TSE  = (D.f[DIR_PMP ])[ktse ];
+      f_TNW  = (D.f[DIR_MPP ])[ktnw ];
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
       //f_B   = (four*rho- four*f_SW-     eight*f_TSW-four*f_W-   eight*f_TW- four*f_NW-     eight*f_TNW-four*f_S-   eight*f_TS-four*f_ZERO+     f_T-four*f_N-   eight*f_TN- four*f_SE-     eight*f_TSE-four*f_E-   eight*f_TE- four*f_NE-     eight*f_TNE)/nine;
@@ -2716,44 +2719,44 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
 
       //real cusq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      //(D.f[REST])[kzero] = c8over27*  (drho-cusq);
-      //(D.f[E])[ke]    = c2over27*  (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq);
-      //(D.f[W])[kw]    = c2over27*  (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq);
-      //(D.f[N])[kn]     = c2over27*  (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cusq);
-      //(D.f[S])[ks]    = c2over27*  (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cusq);
-      //(D.f[T])[kt]    = c2over27*  (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cusq);
-      //(D.f[B])[kb]    = c2over27*  (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cusq);
-      //(D.f[NE])[kne]   = c1over54*  (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cusq);
-      //(D.f[SW])[ksw]   = c1over54*  (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq);
-      //(D.f[SE])[kse]   =  c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq);
-      //(D.f[NW])[knw]   =  c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq);
-      //(D.f[TE])[kte]   =  c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq);
-      //(D.f[BW])[kbw]   =  c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq);
-      //(D.f[BE])[kbe]   =  c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq);
-      //(D.f[TW])[ktw]   =  c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq);
-      //(D.f[TN])[ktn]   =  c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq);
-      //(D.f[BS])[kbs]   =  c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq);
-      //(D.f[BN])[kbn]   =  c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq);
-      //(D.f[TS])[kts]   =  c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cusq);
-      //(D.f[TNE])[ktne]  =  c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq);
-      //(D.f[BSW])[kbsw]  =  c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq);
-      //(D.f[BNE])[kbne]  =  c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq);
-      //(D.f[TSW])[ktsw]  =  c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq);
-      //(D.f[TSE])[ktse]  =  c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq);
-      //(D.f[BNW])[kbnw]  =  c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq);
-      //(D.f[BSE])[kbse]  =  c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq);
-      //(D.f[TNW])[ktnw]  =  c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq);
+      //(D.f[DIR_000])[kzero] = c8over27*  (drho-cusq);
+      //(D.f[DIR_P00])[ke]    = c2over27*  (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq);
+      //(D.f[DIR_M00])[kw]    = c2over27*  (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq);
+      //(D.f[DIR_0P0])[kn]     = c2over27*  (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cusq);
+      //(D.f[DIR_0M0])[ks]    = c2over27*  (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cusq);
+      //(D.f[DIR_00P])[kt]    = c2over27*  (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cusq);
+      //(D.f[DIR_00M])[kb]    = c2over27*  (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cusq);
+      //(D.f[DIR_PP0])[kne]   = c1over54*  (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cusq);
+      //(D.f[DIR_MM0])[ksw]   = c1over54*  (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq);
+      //(D.f[DIR_PM0])[kse]   =  c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq);
+      //(D.f[DIR_MP0])[knw]   =  c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq);
+      //(D.f[DIR_P0P])[kte]   =  c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq);
+      //(D.f[DIR_M0M])[kbw]   =  c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq);
+      //(D.f[DIR_P0M])[kbe]   =  c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq);
+      //(D.f[DIR_M0P])[ktw]   =  c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq);
+      //(D.f[DIR_0PP])[ktn]   =  c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq);
+      //(D.f[DIR_0MM])[kbs]   =  c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq);
+      //(D.f[DIR_0PM])[kbn]   =  c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq);
+      //(D.f[DIR_0MP])[kts]   =  c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cusq);
+      //(D.f[DIR_PPP])[ktne]  =  c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq);
+      //(D.f[DIR_MMM])[kbsw]  =  c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq);
+      //(D.f[DIR_PPM])[kbne]  =  c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq);
+      //(D.f[DIR_MMP])[ktsw]  =  c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq);
+      //(D.f[DIR_PMP])[ktse]  =  c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq);
+      //(D.f[DIR_MPM])[kbnw]  =  c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq);
+      //(D.f[DIR_PMM])[kbse]  =  c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq);
+      //(D.f[DIR_MPP])[ktnw]  =  c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq);
       real drho   =    f_ZERO+f_E+f_W+f_N+f_S+f_T+f_NE+f_SW+f_SE+f_NW+f_TE+f_TW+f_TN+f_TS+f_TNE+f_TSW+f_TSE+f_TNW;
       real dTop   =    f_T+f_TE+f_TW+f_TN+f_TS+f_TNE+f_TSW+f_TSE+f_TNW;
-      (D.f[B])[kb]     = (f_T+c2o27)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c2o27;
-      (D.f[BW])[kbw]   = (f_TW+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
-      (D.f[BE])[kbe]   = (f_TE+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
-      (D.f[BS])[kbs]   = (f_TS+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
-      (D.f[BN])[kbn]   = (f_TN+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
-      (D.f[BSW])[kbsw] = (f_TSW+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
-      (D.f[BNE])[kbne] = (f_TNE+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
-      (D.f[BNW])[kbnw] = (f_TNW+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
-      (D.f[BSE])[kbse] = (f_TSE+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
+      (D.f[DIR_00M])[kb]     = (f_T+c2o27)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c2o27;
+      (D.f[DIR_M0M])[kbw]   = (f_TW+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
+      (D.f[DIR_P0M])[kbe]   = (f_TE+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
+      (D.f[DIR_0MM])[kbs]   = (f_TS+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
+      (D.f[DIR_0PM])[kbn]   = (f_TN+c1o54)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o54;
+      (D.f[DIR_MMM])[kbsw] = (f_TSW+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
+      (D.f[DIR_PPM])[kbne] = (f_TNE+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
+      (D.f[DIR_MPM])[kbnw] = (f_TNW+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
+      (D.f[DIR_PMM])[kbse] = (f_TSE+c1o216)*(rho-drho+c1o1/c6o1)/(dTop+c1o1/c6o1)-c1o216;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2793,12 +2796,14 @@ extern "C" __global__ void QPressDeviceDirDepBot27(  real* rhoBC,
 
 
 
-
-
+__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real cs)
+{
+   return f1[dir] * cs + (c1o1 - cs) * f[dir];
+}
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
-												 real* DD, 
+__global__ void QPressNoRhoDevice27( real* rhoBC,
+												 real* distributions, 
 												 int* k_Q, 
 												 int* k_N, 
 												 int numberOfBCnodes, 
@@ -2806,238 +2811,176 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
-												 unsigned int size_Mat, 
-												 bool isEvenTimestep)
+												 unsigned int numberOfLBnodes, 
+												 bool isEvenTimestep,
+                                     int direction)
 {
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
 
-   const unsigned nx = blockDim.x;
-   const unsigned ny = gridDim.x;
 
-   const unsigned k = nx*(ny*z + y) + x;
+   const unsigned k = vf::gpu::getNodeIndex();
    //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
-   {
-      ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
-      //unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
-      unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
-      unsigned int kse  = ks;
-      unsigned int knw  = kw;
-      unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
-      unsigned int kbe  = kb;
-      unsigned int ktw  = kw;
-      unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
-      unsigned int kbn  = kb;
-      unsigned int kts  = ks;
-      unsigned int ktse = ks;
-      unsigned int kbnw = kbw;
-      unsigned int ktnw = kw;
-      unsigned int kbse = kbs;
-      unsigned int ktsw = ksw;
-      unsigned int kbne = kb;
-      unsigned int ktne = KQK;
-      unsigned int kbsw = neighborZ[ksw];
-      ////////////////////////////////////////////////////////////////////////////////
-      //index1
-      unsigned int K1QK  = k_N[k];
-      //unsigned int k1zero= K1QK;
-      unsigned int k1e   = K1QK;
-      unsigned int k1w   = neighborX[K1QK];
-      unsigned int k1n   = K1QK;
-      unsigned int k1s   = neighborY[K1QK];
-      unsigned int k1t   = K1QK;
-      unsigned int k1b   = neighborZ[K1QK];
-      unsigned int k1sw  = neighborY[k1w];
-      unsigned int k1ne  = K1QK;
-      unsigned int k1se  = k1s;
-      unsigned int k1nw  = k1w;
-      unsigned int k1bw  = neighborZ[k1w];
-      unsigned int k1te  = K1QK;
-      unsigned int k1be  = k1b;
-      unsigned int k1tw  = k1w;
-      unsigned int k1bs  = neighborZ[k1s];
-      unsigned int k1tn  = K1QK;
-      unsigned int k1bn  = k1b;
-      unsigned int k1ts  = k1s;
-      unsigned int k1tse = k1s;
-      unsigned int k1bnw = k1bw;
-      unsigned int k1tnw = k1w;
-      unsigned int k1bse = k1bs;
-      unsigned int k1tsw = k1sw;
-      unsigned int k1bne = k1b;
-      unsigned int k1tne = K1QK;
-      unsigned int k1bsw = neighborZ[k1sw];
-      ////////////////////////////////////////////////////////////////////////////////
-      Distributions27 D;
-      if (isEvenTimestep==true)
-      {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
-      } 
-      else
-      {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
-      }
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f1_E    = (D.f[E   ])[k1e   ];
-      real f1_W    = (D.f[W   ])[k1w   ];
-      real f1_N    = (D.f[N   ])[k1n   ];
-      real f1_S    = (D.f[S   ])[k1s   ];
-      real f1_T    = (D.f[T   ])[k1t   ];
-      real f1_B    = (D.f[B   ])[k1b   ];
-      real f1_NE   = (D.f[NE  ])[k1ne  ];
-      real f1_SW   = (D.f[SW  ])[k1sw  ];
-      real f1_SE   = (D.f[SE  ])[k1se  ];
-      real f1_NW   = (D.f[NW  ])[k1nw  ];
-      real f1_TE   = (D.f[TE  ])[k1te  ];
-      real f1_BW   = (D.f[BW  ])[k1bw  ];
-      real f1_BE   = (D.f[BE  ])[k1be  ];
-      real f1_TW   = (D.f[TW  ])[k1tw  ];
-      real f1_TN   = (D.f[TN  ])[k1tn  ];
-      real f1_BS   = (D.f[BS  ])[k1bs  ];
-      real f1_BN   = (D.f[BN  ])[k1bn  ];
-      real f1_TS   = (D.f[TS  ])[k1ts  ];
-      //real f1_ZERO = (D.f[REST])[k1zero];
-      real f1_TNE  = (D.f[TNE ])[k1tne ];
-      real f1_TSW  = (D.f[TSW ])[k1tsw ];
-      real f1_TSE  = (D.f[TSE ])[k1tse ];
-      real f1_TNW  = (D.f[TNW ])[k1tnw ];
-      real f1_BNE  = (D.f[BNE ])[k1bne ];
-      real f1_BSW  = (D.f[BSW ])[k1bsw ];
-      real f1_BSE  = (D.f[BSE ])[k1bse ];
-      real f1_BNW  = (D.f[BNW ])[k1bnw ];
-      //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      real f_E    = (D.f[E   ])[ke   ];
-      real f_W    = (D.f[W   ])[kw   ];
-      real f_N    = (D.f[N   ])[kn   ];
-      real f_S    = (D.f[S   ])[ks   ];
-      real f_T    = (D.f[T   ])[kt   ];
-      real f_B    = (D.f[B   ])[kb   ];
-      real f_NE   = (D.f[NE  ])[kne  ];
-      real f_SW   = (D.f[SW  ])[ksw  ];
-      real f_SE   = (D.f[SE  ])[kse  ];
-      real f_NW   = (D.f[NW  ])[knw  ];
-      real f_TE   = (D.f[TE  ])[kte  ];
-      real f_BW   = (D.f[BW  ])[kbw  ];
-      real f_BE   = (D.f[BE  ])[kbe  ];
-      real f_TW   = (D.f[TW  ])[ktw  ];
-      real f_TN   = (D.f[TN  ])[ktn  ];
-      real f_BS   = (D.f[BS  ])[kbs  ];
-      real f_BN   = (D.f[BN  ])[kbn  ];
-      real f_TS   = (D.f[TS  ])[kts  ];
-      //real f_ZERO = (D.f[REST])[kzero];
-      real f_TNE  = (D.f[TNE ])[ktne ];
-      real f_TSW  = (D.f[TSW ])[ktsw ];
-      real f_TSE  = (D.f[TSE ])[ktse ];
-      real f_TNW  = (D.f[TNW ])[ktnw ];
-      real f_BNE  = (D.f[BNE ])[kbne ];
-      real f_BSW  = (D.f[BSW ])[kbsw ];
-      real f_BSE  = (D.f[BSE ])[kbse ];
-      real f_BNW  = (D.f[BNW ])[kbnw ];
-      //////////////////////////////////////////////////////////////////////////
+   if(k>=numberOfBCnodes) return;
 
-      //real vx1, vx2, vx3, drho;
-      //real vx1, vx2, vx3, drho, drho1;
-      //////////////////////////////////////////////////////////////////////////
-	  //Dichte
-    //   drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
-    //             f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
-    //             f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[REST])[k1zero]); 
-    //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-    //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-    //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
-      
-      //////////////////////////////////////////////////////////////////////////
-	  //Ux
+   ////////////////////////////////////////////////////////////////////////////////
+   //index
+   unsigned int KQK  = k_Q[k];
+   // unsigned int kzero= KQK;
+   unsigned int ke   = KQK;
+   unsigned int kw   = neighborX[KQK];
+   unsigned int kn   = KQK;
+   unsigned int ks   = neighborY[KQK];
+   unsigned int kt   = KQK;
+   unsigned int kb   = neighborZ[KQK];
+   unsigned int ksw  = neighborY[kw];
+   unsigned int kne  = KQK;
+   unsigned int kse  = ks;
+   unsigned int knw  = kw;
+   unsigned int kbw  = neighborZ[kw];
+   unsigned int kte  = KQK;
+   unsigned int kbe  = kb;
+   unsigned int ktw  = kw;
+   unsigned int kbs  = neighborZ[ks];
+   unsigned int ktn  = KQK;
+   unsigned int kbn  = kb;
+   unsigned int kts  = ks;
+   unsigned int ktse = ks;
+   unsigned int kbnw = kbw;
+   unsigned int ktnw = kw;
+   unsigned int kbse = kbs;
+   unsigned int ktsw = ksw;
+   unsigned int kbne = kb;
+   unsigned int ktne = KQK;
+   unsigned int kbsw = neighborZ[ksw];
+   ////////////////////////////////////////////////////////////////////////////////
+   //index1
+   unsigned int K1QK  = k_N[k];
+   //unsigned int k1zero= K1QK;
+   unsigned int k1e   = K1QK;
+   unsigned int k1w   = neighborX[K1QK];
+   unsigned int k1n   = K1QK;
+   unsigned int k1s   = neighborY[K1QK];
+   unsigned int k1t   = K1QK;
+   unsigned int k1b   = neighborZ[K1QK];
+   unsigned int k1sw  = neighborY[k1w];
+   unsigned int k1ne  = K1QK;
+   unsigned int k1se  = k1s;
+   unsigned int k1nw  = k1w;
+   unsigned int k1bw  = neighborZ[k1w];
+   unsigned int k1te  = K1QK;
+   unsigned int k1be  = k1b;
+   unsigned int k1tw  = k1w;
+   unsigned int k1bs  = neighborZ[k1s];
+   unsigned int k1tn  = K1QK;
+   unsigned int k1bn  = k1b;
+   unsigned int k1ts  = k1s;
+   unsigned int k1tse = k1s;
+   unsigned int k1bnw = k1bw;
+   unsigned int k1tnw = k1w;
+   unsigned int k1bse = k1bs;
+   unsigned int k1tsw = k1sw;
+   unsigned int k1bne = k1b;
+   unsigned int k1tne = K1QK;
+   unsigned int k1bsw = neighborZ[k1sw];
+   ////////////////////////////////////////////////////////////////////////////////
+   Distributions27 dist;
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);      
+   real f[27], f1[27]; 
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f1[DIR_P00] = (dist.f[DIR_P00])[k1e   ];
+   f1[DIR_M00] = (dist.f[DIR_M00])[k1w   ];
+   f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n   ];
+   f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s   ];
+   f1[DIR_00P] = (dist.f[DIR_00P])[k1t   ];
+   f1[DIR_00M] = (dist.f[DIR_00M])[k1b   ];
+   f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne  ];
+   f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw  ];
+   f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se  ];
+   f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw  ];
+   f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te  ];
+   f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw  ];
+   f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be  ];
+   f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw  ];
+   f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn  ];
+   f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs  ];
+   f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn  ];
+   f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts  ];
+   // f1[DIR_000] = (dist.f[DIR_000])[k1zero];
+   f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ];
+   f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ];
+   f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ];
+   f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ];
+   f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ];
+   f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ];
+   f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ];
+   f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f[DIR_P00] = (dist.f[DIR_P00])[ke   ];
+   f[DIR_M00] = (dist.f[DIR_M00])[kw   ];
+   f[DIR_0P0] = (dist.f[DIR_0P0])[kn   ];
+   f[DIR_0M0] = (dist.f[DIR_0M0])[ks   ];
+   f[DIR_00P] = (dist.f[DIR_00P])[kt   ];
+   f[DIR_00M] = (dist.f[DIR_00M])[kb   ];
+   f[DIR_PP0] = (dist.f[DIR_PP0])[kne  ];
+   f[DIR_MM0] = (dist.f[DIR_MM0])[ksw  ];
+   f[DIR_PM0] = (dist.f[DIR_PM0])[kse  ];
+   f[DIR_MP0] = (dist.f[DIR_MP0])[knw  ];
+   f[DIR_P0P] = (dist.f[DIR_P0P])[kte  ];
+   f[DIR_M0M] = (dist.f[DIR_M0M])[kbw  ];
+   f[DIR_P0M] = (dist.f[DIR_P0M])[kbe  ];
+   f[DIR_M0P] = (dist.f[DIR_M0P])[ktw  ];
+   f[DIR_0PP] = (dist.f[DIR_0PP])[ktn  ];
+   f[DIR_0MM] = (dist.f[DIR_0MM])[kbs  ];
+   f[DIR_0PM] = (dist.f[DIR_0PM])[kbn  ];
+   f[DIR_0MP] = (dist.f[DIR_0MP])[kts  ];
+   // f[DIR_000] = (dist.f[DIR_000])[kzero];
+   f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ];
+   f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ];
+   f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ];
+   f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ];
+   f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ];
+   f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ];
+   f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ];
+   f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ];
+   //////////////////////////////////////////////////////////////////////////
+
+   //real vx1, vx2, vx3, drho;
+   //real vx1, vx2, vx3, drho, drho1;
+   //////////////////////////////////////////////////////////////////////////
+   ////Dichte
+   //   drho1  =  f1_TSE + f1_TNW + f1_TNE + f1_TSW + f1_BSE + f1_BNW + f1_BNE + f1_BSW +
+   //             f1_BN + f1_TS + f1_TN + f1_BS + f1_BE + f1_TW + f1_TE + f1_BW + f1_SE + f1_NW + f1_NE + f1_SW + 
+   //             f1_T + f1_B + f1_N + f1_S + f1_E + f1_W + ((D.f[DIR_000])[k1zero]); 
+   //   drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+   //             f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+   //             f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
+
+   //////////////////////////////////////////////////////////////////////////
+   ////Ux
 
-	  //vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+   //vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
    //               ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
    //               (f_E - f_W)) /(one + drho); 
 
 
-   //   vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+   //vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
    //               ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
    //               (f_N - f_S)) /(one + drho); 
 
-   //   vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+   //vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
    //               (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
    //               (f_T - f_B)) /(one + drho); 
 
 
-      //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
+   //real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-   //   //////////////////////////////////////////////////////////////////////////
-	  ////real omega = om1;
+   //////////////////////////////////////////////////////////////////////////
+	////real omega = om1;
    //   real cusq  = c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
    //   //////////////////////////////////////////////////////////////////////////
-	  ////T�st MK
-	  ////if(vx1 < zero) vx1 = zero;
+   ////T�st MK
+   ////if(vx1 < zero) vx1 = zero;
    //   //////////////////////////////////////////////////////////////////////////
    //   real fZERO = c8over27*  (drho1-(one + drho1)*(cusq))                                                           ;
    //   real fE    = c2over27*  (drho1+(one + drho1)*(three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq));
@@ -3050,10 +2993,75 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
    //   real fSW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq));
    //   real fSE   = c1over54*  (drho1+(one + drho1)*(three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq));
    //   real fNW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq));
-   //   real fTE   = c1over54*  (drho1+(one + drho1)*(three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq));
-   //   real fBW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq));
-   //   real fBE   = c1over54*  (drho1+(one + drho1)*(three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq));
-   //   real fTW   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
+   //   real fTE	  /////////////////////////////////////////////////////////////
+   //with velocity
+   //if(true){//vx1 >= zero){
+      // real csMvx = one / sqrtf(three) - vx1;
+      // //real csMvy = one / sqrtf(three) - vx2;
+      // ///////////////////////////////////////////
+      // // X
+      // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
+      // ///////////////////////////////////////////
+      // // Y
+      // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
+      // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
+      // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
+      // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
+      // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
+      // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
+      // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
+      // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
+      // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
+      // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
+      // //////////////////////////////////////////////////////////////////////////
+   //}
+   //else
+   //{
+      // ///////////////////////////////////////////
+      // // X
+      // vx1   = vx1 * 0.9;
+      // f_W   = f_E   - six * c2over27  * ( vx1        );
+      // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
+      // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
+      // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
+      // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
+      // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
+      // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
+      // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
+      // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
+      // ///////////////////////////////////////////
+      // // Y
+      // //vx2   = vx2 * 0.9;
+      // //f_S   = f_N   - six * c2over27  * (     vx2    );
+      // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
+      // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
+      // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
+      // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
+      // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
+      // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
+      // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
+      // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
+      // ///////////////////////////////////////////
+   //}
+   /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+   //   = c1over54*  (drho1+(one + drho1)*(three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq));
    //   real fTN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq));
    //   real fBS   = c1over54*  (drho1+(one + drho1)*(three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq));
    //   real fBN   = c1over54*  (drho1+(one + drho1)*(three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq));
@@ -3067,222 +3075,322 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
    //   real fBSE  = c1over216* (drho1+(one + drho1)*(three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq));
    //   real fTNW  = c1over216* (drho1+(one + drho1)*(three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq));
 
-	  real cs = c1o1 / sqrtf(c3o1);
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //no velocity
-	  //////////////////////////////////////////
-      f_E    = f1_E   * cs + (c1o1 - cs) * f_E   ;
-      f_W    = f1_W   * cs + (c1o1 - cs) * f_W   ;
-      f_N    = f1_N   * cs + (c1o1 - cs) * f_N   ;
-      f_S    = f1_S   * cs + (c1o1 - cs) * f_S   ;
-      f_T    = f1_T   * cs + (c1o1 - cs) * f_T   ;
-      f_B    = f1_B   * cs + (c1o1 - cs) * f_B   ;
-      f_NE   = f1_NE  * cs + (c1o1 - cs) * f_NE  ;
-      f_SW   = f1_SW  * cs + (c1o1 - cs) * f_SW  ;
-      f_SE   = f1_SE  * cs + (c1o1 - cs) * f_SE  ;
-      f_NW   = f1_NW  * cs + (c1o1 - cs) * f_NW  ;
-      f_TE   = f1_TE  * cs + (c1o1 - cs) * f_TE  ;
-      f_BW   = f1_BW  * cs + (c1o1 - cs) * f_BW  ;
-      f_BE   = f1_BE  * cs + (c1o1 - cs) * f_BE  ;
-      f_TW   = f1_TW  * cs + (c1o1 - cs) * f_TW  ;
-      f_TN   = f1_TN  * cs + (c1o1 - cs) * f_TN  ;
-      f_BS   = f1_BS  * cs + (c1o1 - cs) * f_BS  ;
-      f_BN   = f1_BN  * cs + (c1o1 - cs) * f_BN  ;
-      f_TS   = f1_TS  * cs + (c1o1 - cs) * f_TS  ;
-      f_TNE  = f1_TNE * cs + (c1o1 - cs) * f_TNE ;
-      f_TSW  = f1_TSW * cs + (c1o1 - cs) * f_TSW ;
-      f_TSE  = f1_TSE * cs + (c1o1 - cs) * f_TSE ;
-      f_TNW  = f1_TNW * cs + (c1o1 - cs) * f_TNW ;
-      f_BNE  = f1_BNE * cs + (c1o1 - cs) * f_BNE ;
-      f_BSW  = f1_BSW * cs + (c1o1 - cs) * f_BSW ;
-      f_BSE  = f1_BSE * cs + (c1o1 - cs) * f_BSE ;
-      f_BNW  = f1_BNW * cs + (c1o1 - cs) * f_BNW ;
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   real cs = c1o1 / sqrtf(c3o1);
 
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //with velocity
-	  //if(true){//vx1 >= zero){
-		 // real csMvx = one / sqrtf(three) - vx1;
-		 // //real csMvy = one / sqrtf(three) - vx2;
-		 // ///////////////////////////////////////////
-		 // // X
-		 // f_W   = f1_W   * csMvx + (one - csMvx) * f_W   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_NW  = f1_NW  * csMvx + (one - csMvx) * f_NW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_SW  = f1_SW  * csMvx + (one - csMvx) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TW  = f1_TW  * csMvx + (one - csMvx) * f_TW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BW  = f1_BW  * csMvx + (one - csMvx) * f_BW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TNW = f1_TNW * csMvx + (one - csMvx) * f_TNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_TSW = f1_TSW * csMvx + (one - csMvx) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BNW = f1_BNW * csMvx + (one - csMvx) * f_BNW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // f_BSW = f1_BSW * csMvx + (one - csMvx) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx1);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S   ;//- c2over27  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS  ;//- c1over54  * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW ;//- c1over216 * ((drho + drho1)*c1o2-((drho + drho1)*c1o2 )*three*vx2);
-		 // //f_S   = f1_S   * csMvy + (one - csMvy) * f_S;
-		 // //f_SE  = f1_SE  * csMvy + (one - csMvy) * f_SE;
-		 // //f_SW  = f1_SW  * csMvy + (one - csMvy) * f_SW;
-		 // //f_TS  = f1_TS  * csMvy + (one - csMvy) * f_TS;
-		 // //f_BS  = f1_BS  * csMvy + (one - csMvy) * f_BS;
-		 // //f_TSE = f1_TSE * csMvy + (one - csMvy) * f_TSE;
-		 // //f_TSW = f1_TSW * csMvy + (one - csMvy) * f_TSW;
-		 // //f_BSE = f1_BSE * csMvy + (one - csMvy) * f_BSE;
-		 // //f_BSW = f1_BSW * csMvy + (one - csMvy) * f_BSW;
-		 // //////////////////////////////////////////////////////////////////////////
-	  //}
-	  //else
-	  //{
-		 // ///////////////////////////////////////////
-		 // // X
-		 // vx1   = vx1 * 0.9;
-		 // f_W   = f_E   - six * c2over27  * ( vx1        );
-		 // f_NW  = f_SE  - six * c1over54  * ( vx1-vx2    );
-		 // f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // f_TW  = f_BE  - six * c1over54  * ( vx1    -vx3);
-		 // f_BW  = f_TE  - six * c1over54  * ( vx1    +vx3);
-		 // f_TNW = f_BSE - six * c1over216 * ( vx1-vx2-vx3);
-		 // f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // f_BNW = f_TSE - six * c1over216 * ( vx1-vx2+vx3);
-		 // f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-		 // // Y
-		 // //vx2   = vx2 * 0.9;
-		 // //f_S   = f_N   - six * c2over27  * (     vx2    );
-		 // //f_SE  = f_NW  - six * c1over54  * (-vx1+vx2    );
-		 // //f_SW  = f_NE  - six * c1over54  * ( vx1+vx2    );
-		 // //f_TS  = f_BN  - six * c1over54  * (     vx2-vx3);
-		 // //f_BS  = f_TN  - six * c1over54  * (     vx2+vx3);
-		 // //f_TSE = f_BNW - six * c1over216 * (-vx1+vx2-vx3);
-		 // //f_TSW = f_BNE - six * c1over216 * ( vx1+vx2-vx3);
-		 // //f_BSE = f_TNW - six * c1over216 * (-vx1+vx2+vx3);
-		 // //f_BSW = f_TNE - six * c1over216 * ( vx1+vx2+vx3);
-		 // ///////////////////////////////////////////
-	  //}
-	  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-	  //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
-      } 
-      else
-      {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
-      }
-      //////////////////////////////////////////////////////////////////////////
-      //__syncthreads();
-	  // -X
-	  //(D.f[E   ])[ke   ] = f_E   ;
-	  //(D.f[SE  ])[kse  ] = f_SE  ;
-	  //(D.f[NE  ])[kne  ] = f_NE  ;
-	  //(D.f[BE  ])[kbe  ] = f_BE  ;
-	  //(D.f[TE  ])[kte  ] = f_TE  ;
-	  //(D.f[TSE ])[ktse ] = f_TSE ;
-	  //(D.f[TNE ])[ktne ] = f_TNE ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BNE ])[kbne ] = f_BNE ;     
-	  // X
-	  (D.f[W   ])[kw   ] = f_W   ;
-	  (D.f[SW  ])[ksw  ] = f_SW  ;
-	  (D.f[NW  ])[knw  ] = f_NW  ;
-	  (D.f[BW  ])[kbw  ] = f_BW  ;
-	  (D.f[TW  ])[ktw  ] = f_TW  ;
-	  (D.f[TSW ])[ktsw ] = f_TSW ;
-	  (D.f[TNW ])[ktnw ] = f_TNW ;
-	  (D.f[BSW ])[kbsw ] = f_BSW ;
-	  (D.f[BNW ])[kbnw ] = f_BNW ;     
-	  // Y
-	  //(D.f[S   ])[ks   ] = f_S   ;
-	  //(D.f[SE  ])[kse  ] = f_SE  ;
-	  //(D.f[SW  ])[ksw  ] = f_SW  ;
-	  //(D.f[TS  ])[kts  ] = f_TS  ;
-	  //(D.f[BS  ])[kbs  ] = f_BS  ;
-	  //(D.f[TSE ])[ktse ] = f_TSE ;
-	  //(D.f[TSW ])[ktsw ] = f_TSW ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BSW ])[kbsw ] = f_BSW ;     
-	  // Z
-	  //(D.f[B   ])[kb   ] = f_B   ;
-	  //(D.f[BE  ])[kbe  ] = f_BE  ;
-	  //(D.f[BW  ])[kbw  ] = f_BW  ;
-	  //(D.f[BN  ])[kbn  ] = f_BN  ;
-	  //(D.f[BS  ])[kbs  ] = f_BS  ;
-	  //(D.f[BNE ])[kbne ] = f_BNE ;
-	  //(D.f[BNW ])[kbnw ] = f_BNW ;
-	  //(D.f[BSE ])[kbse ] = f_BSE ;
-	  //(D.f[BSW ])[kbsw ] = f_BSW ;     
-      //////////////////////////////////////////////////////////////////////////
+   //////////////////////////////////////////////////////////////////////////
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+   switch(direction)
+   {
+      case MZZ:
+         (dist.f[DIR_P00])[ke   ] = computeOutflowDistribution(f, f1, DIR_P00, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         break;
+
+      case PZZ:
+         (dist.f[DIR_M00])[kw   ] = computeOutflowDistribution(f, f1, DIR_M00, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;
+
+      case ZMZ:
+         (dist.f[DIR_0P0])[kn   ] = computeOutflowDistribution(f, f1, DIR_0P0, cs);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, cs);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         break;  
+
+      case ZPZ:   
+         (dist.f[DIR_0M0])[ks   ] = computeOutflowDistribution(f, f1, DIR_0M0, cs);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, cs);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);
+         break;
+
+      case ZZM:
+         (dist.f[DIR_00P])[kt   ] = computeOutflowDistribution(f, f1, DIR_00P, cs);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, cs);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, cs);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, cs);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, cs);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, cs);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, cs);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, cs);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, cs); 
+         break;
+
+      case ZZP:
+         (dist.f[DIR_00M])[kb   ] = computeOutflowDistribution(f, f1, DIR_00M, cs);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, cs);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, cs);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, cs);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, cs);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, cs);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, cs);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, cs);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, cs);     
+         break;
+      default:
+         break;
    }
 }
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-
 
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+__host__ __device__ real computeOutflowDistribution(const real* const &f, const real* const &f1, const int dir, const real rhoCorrection, const real cs, const real weight)
+{
+   return f1[dir  ] * cs + (c1o1 - cs) * f[dir  ] - weight *rhoCorrection;
+}
 
+__global__ void QPressZeroRhoOutflowDevice27(  real* rhoBC,
+												 real* distributions, 
+												 int* k_Q, 
+												 int* k_N, 
+												 int numberOfBCnodes, 
+												 real om1, 
+												 unsigned int* neighborX,
+												 unsigned int* neighborY,
+												 unsigned int* neighborZ,
+												 unsigned int numberOfLBnodes, 
+												 bool isEvenTimestep,
+                                     int direction,
+                                     real densityCorrectionFactor)
+{
+   ////////////////////////////////////////////////////////////////////////////////
+   const unsigned k = vf::gpu::getNodeIndex();
+   
+   //////////////////////////////////////////////////////////////////////////
 
+   if(k>=numberOfBCnodes) return;
+   ////////////////////////////////////////////////////////////////////////////////
+   //index
+   unsigned int KQK  = k_Q[k];
+   unsigned int kzero= KQK;
+   unsigned int ke   = KQK;
+   unsigned int kw   = neighborX[KQK];
+   unsigned int kn   = KQK;
+   unsigned int ks   = neighborY[KQK];
+   unsigned int kt   = KQK;
+   unsigned int kb   = neighborZ[KQK];
+   unsigned int ksw  = neighborY[kw];
+   unsigned int kne  = KQK;
+   unsigned int kse  = ks;
+   unsigned int knw  = kw;
+   unsigned int kbw  = neighborZ[kw];
+   unsigned int kte  = KQK;
+   unsigned int kbe  = kb;
+   unsigned int ktw  = kw;
+   unsigned int kbs  = neighborZ[ks];
+   unsigned int ktn  = KQK;
+   unsigned int kbn  = kb;
+   unsigned int kts  = ks;
+   unsigned int ktse = ks;
+   unsigned int kbnw = kbw;
+   unsigned int ktnw = kw;
+   unsigned int kbse = kbs;
+   unsigned int ktsw = ksw;
+   unsigned int kbne = kb;
+   unsigned int ktne = KQK;
+   unsigned int kbsw = neighborZ[ksw];
+   ////////////////////////////////////////////////////////////////////////////////
+   //index1
+   unsigned int K1QK  = k_N[k];
+   // unsigned int k1zero= K1QK;
+   unsigned int k1e   = K1QK;
+   unsigned int k1w   = neighborX[K1QK];
+   unsigned int k1n   = K1QK;
+   unsigned int k1s   = neighborY[K1QK];
+   unsigned int k1t   = K1QK;
+   unsigned int k1b   = neighborZ[K1QK];
+   unsigned int k1sw  = neighborY[k1w];
+   unsigned int k1ne  = K1QK;
+   unsigned int k1se  = k1s;
+   unsigned int k1nw  = k1w;
+   unsigned int k1bw  = neighborZ[k1w];
+   unsigned int k1te  = K1QK;
+   unsigned int k1be  = k1b;
+   unsigned int k1tw  = k1w;
+   unsigned int k1bs  = neighborZ[k1s];
+   unsigned int k1tn  = K1QK;
+   unsigned int k1bn  = k1b;
+   unsigned int k1ts  = k1s;
+   unsigned int k1tse = k1s;
+   unsigned int k1bnw = k1bw;
+   unsigned int k1tnw = k1w;
+   unsigned int k1bse = k1bs;
+   unsigned int k1tsw = k1sw;
+   unsigned int k1bne = k1b;
+   unsigned int k1tne = K1QK;
+   unsigned int k1bsw = neighborZ[k1sw];
+   ////////////////////////////////////////////////////////////////////////////////
+   Distributions27 dist;
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);   
+   real f1[27], f[27];   
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f1[DIR_P00] = (dist.f[DIR_P00])[k1e   ];
+   f1[DIR_M00] = (dist.f[DIR_M00])[k1w   ];
+   f1[DIR_0P0] = (dist.f[DIR_0P0])[k1n   ];
+   f1[DIR_0M0] = (dist.f[DIR_0M0])[k1s   ];
+   f1[DIR_00P] = (dist.f[DIR_00P])[k1t   ];
+   f1[DIR_00M] = (dist.f[DIR_00M])[k1b   ];
+   f1[DIR_PP0] = (dist.f[DIR_PP0])[k1ne  ];
+   f1[DIR_MM0] = (dist.f[DIR_MM0])[k1sw  ];
+   f1[DIR_PM0] = (dist.f[DIR_PM0])[k1se  ];
+   f1[DIR_MP0] = (dist.f[DIR_MP0])[k1nw  ];
+   f1[DIR_P0P] = (dist.f[DIR_P0P])[k1te  ];
+   f1[DIR_M0M] = (dist.f[DIR_M0M])[k1bw  ];
+   f1[DIR_P0M] = (dist.f[DIR_P0M])[k1be  ];
+   f1[DIR_M0P] = (dist.f[DIR_M0P])[k1tw  ];
+   f1[DIR_0PP] = (dist.f[DIR_0PP])[k1tn  ];
+   f1[DIR_0MM] = (dist.f[DIR_0MM])[k1bs  ];
+   f1[DIR_0PM] = (dist.f[DIR_0PM])[k1bn  ];
+   f1[DIR_0MP] = (dist.f[DIR_0MP])[k1ts  ];
+   // f1[DIR_000] = (dist.f[DIR_000])[k1zero];
+   f1[DIR_PPP] = (dist.f[DIR_PPP])[k1tne ];
+   f1[DIR_MMP] = (dist.f[DIR_MMP])[k1tsw ];
+   f1[DIR_PMP] = (dist.f[DIR_PMP])[k1tse ];
+   f1[DIR_MPP] = (dist.f[DIR_MPP])[k1tnw ];
+   f1[DIR_PPM] = (dist.f[DIR_PPM])[k1bne ];
+   f1[DIR_MMM] = (dist.f[DIR_MMM])[k1bsw ];
+   f1[DIR_PMM] = (dist.f[DIR_PMM])[k1bse ];
+   f1[DIR_MPM] = (dist.f[DIR_MPM])[k1bnw ];
+   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+   f[DIR_P00] = (dist.f[DIR_P00])[ke   ];
+   f[DIR_M00] = (dist.f[DIR_M00])[kw   ];
+   f[DIR_0P0] = (dist.f[DIR_0P0])[kn   ];
+   f[DIR_0M0] = (dist.f[DIR_0M0])[ks   ];
+   f[DIR_00P] = (dist.f[DIR_00P])[kt   ];
+   f[DIR_00M] = (dist.f[DIR_00M])[kb   ];
+   f[DIR_PP0] = (dist.f[DIR_PP0])[kne  ];
+   f[DIR_MM0] = (dist.f[DIR_MM0])[ksw  ];
+   f[DIR_PM0] = (dist.f[DIR_PM0])[kse  ];
+   f[DIR_MP0] = (dist.f[DIR_MP0])[knw  ];
+   f[DIR_P0P] = (dist.f[DIR_P0P])[kte  ];
+   f[DIR_M0M] = (dist.f[DIR_M0M])[kbw  ];
+   f[DIR_P0M] = (dist.f[DIR_P0M])[kbe  ];
+   f[DIR_M0P] = (dist.f[DIR_M0P])[ktw  ];
+   f[DIR_0PP] = (dist.f[DIR_0PP])[ktn  ];
+   f[DIR_0MM] = (dist.f[DIR_0MM])[kbs  ];
+   f[DIR_0PM] = (dist.f[DIR_0PM])[kbn  ];
+   f[DIR_0MP] = (dist.f[DIR_0MP])[kts  ];
+   f[DIR_000] = (dist.f[DIR_000])[kzero];
+   f[DIR_PPP] = (dist.f[DIR_PPP])[ktne ];
+   f[DIR_MMP] = (dist.f[DIR_MMP])[ktsw ];
+   f[DIR_PMP] = (dist.f[DIR_PMP])[ktse ];
+   f[DIR_MPP] = (dist.f[DIR_MPP])[ktnw ];
+   f[DIR_PPM] = (dist.f[DIR_PPM])[kbne ];
+   f[DIR_MMM] = (dist.f[DIR_MMM])[kbsw ];
+   f[DIR_PMM] = (dist.f[DIR_PMM])[kbse ];
+   f[DIR_MPM] = (dist.f[DIR_MPM])[kbnw ];
+   //////////////////////////////////////////////////////////////////////////
+   real drho = vf::lbm::getDensity(f);
+   
+   real rhoCorrection = densityCorrectionFactor*drho;
+   
+   real cs = c1o1 / sqrtf(c3o1);
 
+   getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
+   switch(direction)
+   {
+      case MZZ:
+         (dist.f[DIR_P00])[ke   ] = computeOutflowDistribution(f, f1, DIR_P00  , rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PM0])[kse  ] = computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216);
+         break;
+
+      case PZZ:
+         (dist.f[DIR_M00])[kw   ] = computeOutflowDistribution(f, f1, DIR_M00, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_MM0])[ksw  ] = computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZMZ:
+         (dist.f[DIR_0P0])[kn   ] = computeOutflowDistribution(f, f1, DIR_0P0, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PP0])[kne  ] = computeOutflowDistribution(f, f1, DIR_PP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MP0])[knw  ] = computeOutflowDistribution(f, f1, DIR_MP0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216);
+         break;  
+
+      case ZPZ:   
+         (dist.f[DIR_0M0])[ks   ] =computeOutflowDistribution(f, f1, DIR_0M0, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_PM0])[kse  ] =computeOutflowDistribution(f, f1, DIR_PM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_MM0])[ksw  ] =computeOutflowDistribution(f, f1, DIR_MM0, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MP])[kts  ] =computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MM])[kbs  ] =computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PMP])[ktse ] =computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMP])[ktsw ] =computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[kbse ] =computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[kbsw ] =computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216);
+         break;
+
+      case ZZM:
+         (dist.f[DIR_00P])[kt   ] = computeOutflowDistribution(f, f1, DIR_00P, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_P0P])[kte  ] = computeOutflowDistribution(f, f1, DIR_P0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0P])[ktw  ] = computeOutflowDistribution(f, f1, DIR_M0P, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PP])[ktn  ] = computeOutflowDistribution(f, f1, DIR_0PP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MP])[kts  ] = computeOutflowDistribution(f, f1, DIR_0MP, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPP])[ktne ] = computeOutflowDistribution(f, f1, DIR_PPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPP])[ktnw ] = computeOutflowDistribution(f, f1, DIR_MPP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMP])[ktse ] = computeOutflowDistribution(f, f1, DIR_PMP, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMP])[ktsw ] = computeOutflowDistribution(f, f1, DIR_MMP, rhoCorrection, cs, c1o216); 
+         break;
+
+      case ZZP:
+         (dist.f[DIR_00M])[kb   ] = computeOutflowDistribution(f, f1, DIR_00M, rhoCorrection, cs, c2o27);
+         (dist.f[DIR_P0M])[kbe  ] = computeOutflowDistribution(f, f1, DIR_P0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_M0M])[kbw  ] = computeOutflowDistribution(f, f1, DIR_M0M, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0PM])[kbn  ] = computeOutflowDistribution(f, f1, DIR_0PM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_0MM])[kbs  ] = computeOutflowDistribution(f, f1, DIR_0MM, rhoCorrection, cs, c1o54);
+         (dist.f[DIR_PPM])[kbne ] = computeOutflowDistribution(f, f1, DIR_PPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MPM])[kbnw ] = computeOutflowDistribution(f, f1, DIR_MPM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_PMM])[kbse ] = computeOutflowDistribution(f, f1, DIR_PMM, rhoCorrection, cs, c1o216);
+         (dist.f[DIR_MMM])[kbsw ] = computeOutflowDistribution(f, f1, DIR_MMM, rhoCorrection, cs, c1o216);     
+         break;
+      default:
+         break;
+   }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 
 
@@ -3314,7 +3422,7 @@ extern "C" __global__ void QPressNoRhoDevice27(  real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
+__global__ void QPressDeviceOld27(real* rhoBC,
                                              real* DD, 
                                              int* k_Q, 
                                              int* k_N, 
@@ -3403,95 +3511,95 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[E   ])[k1e   ];
-      f1_E    = (D.f[W   ])[k1w   ];
-      f1_S    = (D.f[N   ])[k1n   ];
-      f1_N    = (D.f[S   ])[k1s   ];
-      f1_B    = (D.f[T   ])[k1t   ];
-      f1_T    = (D.f[B   ])[k1b   ];
-      f1_SW   = (D.f[NE  ])[k1ne  ];
-      f1_NE   = (D.f[SW  ])[k1sw  ];
-      f1_NW   = (D.f[SE  ])[k1se  ];
-      f1_SE   = (D.f[NW  ])[k1nw  ];
-      f1_BW   = (D.f[TE  ])[k1te  ];
-      f1_TE   = (D.f[BW  ])[k1bw  ];
-      f1_TW   = (D.f[BE  ])[k1be  ];
-      f1_BE   = (D.f[TW  ])[k1tw  ];
-      f1_BS   = (D.f[TN  ])[k1tn  ];
-      f1_TN   = (D.f[BS  ])[k1bs  ];
-      f1_TS   = (D.f[BN  ])[k1bn  ];
-      f1_BN   = (D.f[TS  ])[k1ts  ];
-      f1_ZERO = (D.f[REST])[k1zero];
-      f1_BSW  = (D.f[TNE ])[k1tne ];
-      f1_BNE  = (D.f[TSW ])[k1tsw ];
-      f1_BNW  = (D.f[TSE ])[k1tse ];
-      f1_BSE  = (D.f[TNW ])[k1tnw ];
-      f1_TSW  = (D.f[BNE ])[k1bne ];
-      f1_TNE  = (D.f[BSW ])[k1bsw ];
-      f1_TNW  = (D.f[BSE ])[k1bse ];
-      f1_TSE  = (D.f[BNW ])[k1bnw ];
+      f1_W    = (D.f[DIR_P00   ])[k1e   ];
+      f1_E    = (D.f[DIR_M00   ])[k1w   ];
+      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+      f1_B    = (D.f[DIR_00P   ])[k1t   ];
+      f1_T    = (D.f[DIR_00M   ])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_ZERO = (D.f[DIR_000])[k1zero];
+      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
 
       //////////////////////////////////////////////////////////////////////////
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
@@ -3503,33 +3611,33 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
 
       __syncthreads();
 
-      (D.f[E   ])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
-      (D.f[W   ])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[N   ])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[S   ])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[T   ])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[B   ])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[NE  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[SW  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[SE  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[NW  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TE  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BW  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BE  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TW  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TN  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BS  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BN  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TS  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[REST])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[TNE ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TSW ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TSE ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TNW ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BNE ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BSW ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BSE ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BNW ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_P00   ])[ke   ] = f1_W   -c2o27*drho1;   //  c1o100;  // zero;  //
+      (D.f[DIR_M00   ])[kw   ] = f1_E   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0P0   ])[kn   ] = f1_S   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0M0   ])[ks   ] = f1_N   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00P   ])[kt   ] = f1_B   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_00M   ])[kb   ] = f1_T   -c2o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3573,7 +3681,7 @@ extern "C" __global__ void QPressDeviceOld27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
+__global__ void QPressDeviceEQZ27(real* rhoBC,
                                              real* DD, 
                                              int* k_Q, 
                                              int* k_N,
@@ -3663,153 +3771,153 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////
     //   Distributions27 kDistTest;
-    //      kDistTest.f[E   ] = &kTestRE[E   *numberOfBCnodes];
-    //      kDistTest.f[W   ] = &kTestRE[W   *numberOfBCnodes];
-    //      kDistTest.f[N   ] = &kTestRE[N   *numberOfBCnodes];
-    //      kDistTest.f[S   ] = &kTestRE[S   *numberOfBCnodes];
-    //      kDistTest.f[T   ] = &kTestRE[T   *numberOfBCnodes];
-    //      kDistTest.f[B   ] = &kTestRE[B   *numberOfBCnodes];
-    //      kDistTest.f[NE  ] = &kTestRE[NE  *numberOfBCnodes];
-    //      kDistTest.f[SW  ] = &kTestRE[SW  *numberOfBCnodes];
-    //      kDistTest.f[SE  ] = &kTestRE[SE  *numberOfBCnodes];
-    //      kDistTest.f[NW  ] = &kTestRE[NW  *numberOfBCnodes];
-    //      kDistTest.f[TE  ] = &kTestRE[TE  *numberOfBCnodes];
-    //      kDistTest.f[BW  ] = &kTestRE[BW  *numberOfBCnodes];
-    //      kDistTest.f[BE  ] = &kTestRE[BE  *numberOfBCnodes];
-    //      kDistTest.f[TW  ] = &kTestRE[TW  *numberOfBCnodes];
-    //      kDistTest.f[TN  ] = &kTestRE[TN  *numberOfBCnodes];
-    //      kDistTest.f[BS  ] = &kTestRE[BS  *numberOfBCnodes];
-    //      kDistTest.f[BN  ] = &kTestRE[BN  *numberOfBCnodes];
-    //      kDistTest.f[TS  ] = &kTestRE[TS  *numberOfBCnodes];
-    //      kDistTest.f[REST] = &kTestRE[REST*numberOfBCnodes];
-    //      kDistTest.f[TNE ] = &kTestRE[TNE *numberOfBCnodes];
-    //      kDistTest.f[TSW ] = &kTestRE[TSW *numberOfBCnodes];
-    //      kDistTest.f[TSE ] = &kTestRE[TSE *numberOfBCnodes];
-    //      kDistTest.f[TNW ] = &kTestRE[TNW *numberOfBCnodes];
-    //      kDistTest.f[BNE ] = &kTestRE[BNE *numberOfBCnodes];
-    //      kDistTest.f[BSW ] = &kTestRE[BSW *numberOfBCnodes];
-    //      kDistTest.f[BSE ] = &kTestRE[BSE *numberOfBCnodes];
-    //      kDistTest.f[BNW ] = &kTestRE[BNW *numberOfBCnodes];
+    //      kDistTest.f[DIR_P00   ] = &kTestRE[DIR_P00   *numberOfBCnodes];
+    //      kDistTest.f[DIR_M00   ] = &kTestRE[DIR_M00   *numberOfBCnodes];
+    //      kDistTest.f[DIR_0P0   ] = &kTestRE[DIR_0P0   *numberOfBCnodes];
+    //      kDistTest.f[DIR_0M0   ] = &kTestRE[DIR_0M0   *numberOfBCnodes];
+    //      kDistTest.f[DIR_00P   ] = &kTestRE[DIR_00P   *numberOfBCnodes];
+    //      kDistTest.f[DIR_00M   ] = &kTestRE[DIR_00M   *numberOfBCnodes];
+    //      kDistTest.f[DIR_PP0  ] = &kTestRE[DIR_PP0  *numberOfBCnodes];
+    //      kDistTest.f[DIR_MM0  ] = &kTestRE[DIR_MM0  *numberOfBCnodes];
+    //      kDistTest.f[DIR_PM0  ] = &kTestRE[DIR_PM0  *numberOfBCnodes];
+    //      kDistTest.f[DIR_MP0  ] = &kTestRE[DIR_MP0  *numberOfBCnodes];
+    //      kDistTest.f[DIR_P0P  ] = &kTestRE[DIR_P0P  *numberOfBCnodes];
+    //      kDistTest.f[DIR_M0M  ] = &kTestRE[DIR_M0M  *numberOfBCnodes];
+    //      kDistTest.f[DIR_P0M  ] = &kTestRE[DIR_P0M  *numberOfBCnodes];
+    //      kDistTest.f[DIR_M0P  ] = &kTestRE[DIR_M0P  *numberOfBCnodes];
+    //      kDistTest.f[DIR_0PP  ] = &kTestRE[DIR_0PP  *numberOfBCnodes];
+    //      kDistTest.f[DIR_0MM  ] = &kTestRE[DIR_0MM  *numberOfBCnodes];
+    //      kDistTest.f[DIR_0PM  ] = &kTestRE[DIR_0PM  *numberOfBCnodes];
+    //      kDistTest.f[DIR_0MP  ] = &kTestRE[DIR_0MP  *numberOfBCnodes];
+    //      kDistTest.f[DIR_000] = &kTestRE[DIR_000*numberOfBCnodes];
+    //      kDistTest.f[DIR_PPP ] = &kTestRE[DIR_PPP *numberOfBCnodes];
+    //      kDistTest.f[DIR_MMP ] = &kTestRE[DIR_MMP *numberOfBCnodes];
+    //      kDistTest.f[DIR_PMP ] = &kTestRE[DIR_PMP *numberOfBCnodes];
+    //      kDistTest.f[DIR_MPP ] = &kTestRE[DIR_MPP *numberOfBCnodes];
+    //      kDistTest.f[DIR_PPM ] = &kTestRE[DIR_PPM *numberOfBCnodes];
+    //      kDistTest.f[DIR_MMM ] = &kTestRE[DIR_MMM *numberOfBCnodes];
+    //      kDistTest.f[DIR_PMM ] = &kTestRE[DIR_PMM *numberOfBCnodes];
+    //      kDistTest.f[DIR_MPM ] = &kTestRE[DIR_MPM *numberOfBCnodes];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   //f1_W    = (D.f[E   ])[k1e   ];
-   //   //f1_E    = (D.f[W   ])[k1w   ];
-   //   //f1_S    = (D.f[N   ])[k1n   ];
-   //   //f1_N    = (D.f[S   ])[k1s   ];
-   //   //f1_B    = (D.f[T   ])[k1t   ];
-   //   //f1_T    = (D.f[B   ])[k1b   ];
-   //   //f1_SW   = (D.f[NE  ])[k1ne  ];
-   //   //f1_NE   = (D.f[SW  ])[k1sw  ];
-   //   //f1_NW   = (D.f[SE  ])[k1se  ];
-   //   //f1_SE   = (D.f[NW  ])[k1nw  ];
-   //   //f1_BW   = (D.f[TE  ])[k1te  ];
-   //   //f1_TE   = (D.f[BW  ])[k1bw  ];
-   //   //f1_TW   = (D.f[BE  ])[k1be  ];
-   //   //f1_BE   = (D.f[TW  ])[k1tw  ];
-   //   //f1_BS   = (D.f[TN  ])[k1tn  ];
-   //   //f1_TN   = (D.f[BS  ])[k1bs  ];
-   //   //f1_TS   = (D.f[BN  ])[k1bn  ];
-   //   //f1_BN   = (D.f[TS  ])[k1ts  ];
-   //   //f1_ZERO = (D.f[REST])[k1zero];
-   //   //f1_BSW  = (D.f[TNE ])[k1tne ];
-   //   //f1_BNE  = (D.f[TSW ])[k1tsw ];
-   //   //f1_BNW  = (D.f[TSE ])[k1tse ];
-   //   //f1_BSE  = (D.f[TNW ])[k1tnw ];
-   //   //f1_TSW  = (D.f[BNE ])[k1bne ];
-   //   //f1_TNE  = (D.f[BSW ])[k1bsw ];
-   //   //f1_TNW  = (D.f[BSE ])[k1bse ];
-   //   //f1_TSE  = (D.f[BNW ])[k1bnw ];
+   //   //f1_W    = (D.f[DIR_P00   ])[k1e   ];
+   //   //f1_E    = (D.f[DIR_M00   ])[k1w   ];
+   //   //f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+   //   //f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+   //   //f1_B    = (D.f[DIR_00P   ])[k1t   ];
+   //   //f1_T    = (D.f[DIR_00M   ])[k1b   ];
+   //   //f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+   //   //f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+   //   //f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+   //   //f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+   //   //f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+   //   //f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+   //   //f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+   //   //f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+   //   //f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+   //   //f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+   //   //f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+   //   //f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   //f1_ZERO = (D.f[DIR_000])[k1zero];
+   //   //f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+   //   //f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+   //   //f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+   //   //f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+   //   //f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+   //   //f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+   //   //f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+   //   //f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   real f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
-   //   f1_E    = (D.f[E   ])[k1e   ];
-   //   f1_W    = (D.f[W   ])[k1w   ];
-   //   f1_N    = (D.f[N   ])[k1n   ];
-   //   f1_S    = (D.f[S   ])[k1s   ];
-   //   f1_T    = (D.f[T   ])[k1t   ];
-   //   f1_B    = (D.f[B   ])[k1b   ];
-   //   f1_NE   = (D.f[NE  ])[k1ne  ];
-   //   f1_SW   = (D.f[SW  ])[k1sw  ];
-   //   f1_SE   = (D.f[SE  ])[k1se  ];
-   //   f1_NW   = (D.f[NW  ])[k1nw  ];
-   //   f1_TE   = (D.f[TE  ])[k1te  ];
-   //   f1_BW   = (D.f[BW  ])[k1bw  ];
-   //   f1_BE   = (D.f[BE  ])[k1be  ];
-   //   f1_TW   = (D.f[TW  ])[k1tw  ];
-   //   f1_TN   = (D.f[TN  ])[k1tn  ];
-   //   f1_BS   = (D.f[BS  ])[k1bs  ];
-   //   f1_BN   = (D.f[BN  ])[k1bn  ];
-   //   f1_TS   = (D.f[TS  ])[k1ts  ];
-   //   f1_ZERO = (D.f[REST])[k1zero];
-   //   f1_TNE  = (D.f[TNE ])[k1tne ];
-   //   f1_TSW  = (D.f[TSW ])[k1tsw ];
-   //   f1_TSE  = (D.f[TSE ])[k1tse ];
-   //   f1_TNW  = (D.f[TNW ])[k1tnw ];
-   //   f1_BNE  = (D.f[BNE ])[k1bne ];
-   //   f1_BSW  = (D.f[BSW ])[k1bsw ];
-   //   f1_BSE  = (D.f[BSE ])[k1bse ];
-   //   f1_BNW  = (D.f[BNW ])[k1bnw ];
+   //   f1_E    = (D.f[DIR_P00   ])[k1e   ];
+   //   f1_W    = (D.f[DIR_M00   ])[k1w   ];
+   //   f1_N    = (D.f[DIR_0P0   ])[k1n   ];
+   //   f1_S    = (D.f[DIR_0M0   ])[k1s   ];
+   //   f1_T    = (D.f[DIR_00P   ])[k1t   ];
+   //   f1_B    = (D.f[DIR_00M   ])[k1b   ];
+   //   f1_NE   = (D.f[DIR_PP0  ])[k1ne  ];
+   //   f1_SW   = (D.f[DIR_MM0  ])[k1sw  ];
+   //   f1_SE   = (D.f[DIR_PM0  ])[k1se  ];
+   //   f1_NW   = (D.f[DIR_MP0  ])[k1nw  ];
+   //   f1_TE   = (D.f[DIR_P0P  ])[k1te  ];
+   //   f1_BW   = (D.f[DIR_M0M  ])[k1bw  ];
+   //   f1_BE   = (D.f[DIR_P0M  ])[k1be  ];
+   //   f1_TW   = (D.f[DIR_M0P  ])[k1tw  ];
+   //   f1_TN   = (D.f[DIR_0PP  ])[k1tn  ];
+   //   f1_BS   = (D.f[DIR_0MM  ])[k1bs  ];
+   //   f1_BN   = (D.f[DIR_0PM  ])[k1bn  ];
+   //   f1_TS   = (D.f[DIR_0MP  ])[k1ts  ];
+   //   f1_ZERO = (D.f[DIR_000])[k1zero];
+   //   f1_TNE  = (D.f[DIR_PPP ])[k1tne ];
+   //   f1_TSW  = (D.f[DIR_MMP ])[k1tsw ];
+   //   f1_TSE  = (D.f[DIR_PMP ])[k1tse ];
+   //   f1_TNW  = (D.f[DIR_MPP ])[k1tnw ];
+   //   f1_BNE  = (D.f[DIR_PPM ])[k1bne ];
+   //   f1_BSW  = (D.f[DIR_MMM ])[k1bsw ];
+   //   f1_BSE  = (D.f[DIR_PMM ])[k1bse ];
+   //   f1_BNW  = (D.f[DIR_MPM ])[k1bnw ];
    //   //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
    //   //////////////////////////////////////////////////////////////////////////
@@ -3914,88 +4022,88 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			//double mfabb = (D.f[E   ])[k1e   ];
-			//double mfcbb = (D.f[W   ])[k1w   ];
-			//double mfbab = (D.f[N   ])[k1n   ];
-			//double mfbcb = (D.f[S   ])[k1s   ];
-			//double mfbba = (D.f[T   ])[k1t   ];
-			//double mfbbc = (D.f[B   ])[k1b   ];
-			//double mfaab = (D.f[NE  ])[k1ne  ];
-			//double mfccb = (D.f[SW  ])[k1sw  ];
-			//double mfacb = (D.f[SE  ])[k1se  ];
-			//double mfcab = (D.f[NW  ])[k1nw  ];
-			//double mfaba = (D.f[TE  ])[k1te  ];
-			//double mfcbc = (D.f[BW  ])[k1bw  ];
-			//double mfabc = (D.f[BE  ])[k1be  ];
-			//double mfcba = (D.f[TW  ])[k1tw  ];
-			//double mfbaa = (D.f[TN  ])[k1tn  ];
-			//double mfbcc = (D.f[BS  ])[k1bs  ];
-			//double mfbac = (D.f[BN  ])[k1bn  ];
-			//double mfbca = (D.f[TS  ])[k1ts  ];
-			//double mfbbb = (D.f[REST])[k1zero];
-			//double mfaaa = (D.f[TNE ])[k1tne ];
-			//double mfcca = (D.f[TSW ])[k1tsw ];
-			//double mfaca = (D.f[TSE ])[k1tse ];
-			//double mfcaa = (D.f[TNW ])[k1tnw ];
-			//double mfaac = (D.f[BNE ])[k1bne ];
-			//double mfccc = (D.f[BSW ])[k1bsw ];
-			//double mfacc = (D.f[BSE ])[k1bse ];
-			//double mfcac = (D.f[BNW ])[k1bnw ];
-			real mfabb = (D.f[E   ])[k1e   ];
-			real mfcbb = (D.f[W   ])[k1w   ];
-			real mfbab = (D.f[N   ])[k1n   ];
-			real mfbcb = (D.f[S   ])[k1s   ];
-			real mfbba = (D.f[T   ])[k1t   ];
-			real mfbbc = (D.f[B   ])[k1b   ];
-			real mfaab = (D.f[NE  ])[k1ne  ];
-			real mfccb = (D.f[SW  ])[k1sw  ];
-			real mfacb = (D.f[SE  ])[k1se  ];
-			real mfcab = (D.f[NW  ])[k1nw  ];
-			real mfaba = (D.f[TE  ])[k1te  ];
-			real mfcbc = (D.f[BW  ])[k1bw  ];
-			real mfabc = (D.f[BE  ])[k1be  ];
-			real mfcba = (D.f[TW  ])[k1tw  ];
-			real mfbaa = (D.f[TN  ])[k1tn  ];
-			real mfbcc = (D.f[BS  ])[k1bs  ];
-			real mfbac = (D.f[BN  ])[k1bn  ];
-			real mfbca = (D.f[TS  ])[k1ts  ];
-			real mfbbb = (D.f[REST])[k1zero];
-			real mfaaa = (D.f[TNE ])[k1tne ];
-			real mfcca = (D.f[TSW ])[k1tsw ];
-			real mfaca = (D.f[TSE ])[k1tse ];
-			real mfcaa = (D.f[TNW ])[k1tnw ];
-			real mfaac = (D.f[BNE ])[k1bne ];
-			real mfccc = (D.f[BSW ])[k1bsw ];
-			real mfacc = (D.f[BSE ])[k1bse ];
-			real mfcac = (D.f[BNW ])[k1bnw ];
-
-			//real mfcbb = (D.f[E   ])[ke   ];
-			//real mfabb = (D.f[W   ])[kw   ];
-			//real mfbcb = (D.f[N   ])[kn   ];
-			//real mfbab = (D.f[S   ])[ks   ];
-			//real mfbbc = (D.f[T   ])[kt   ];
-			//real mfbba = (D.f[B   ])[kb   ];
-			//real mfccb = (D.f[NE  ])[kne  ];
-			//real mfaab = (D.f[SW  ])[ksw  ];
-			//real mfcab = (D.f[SE  ])[kse  ];
-			//real mfacb = (D.f[NW  ])[knw  ];
-			//real mfcbc = (D.f[TE  ])[kte  ];
-			//real mfaba = (D.f[BW  ])[kbw  ];
-			//real mfcba = (D.f[BE  ])[kbe  ];
-			//real mfabc = (D.f[TW  ])[ktw  ];
-			//real mfbcc = (D.f[TN  ])[ktn  ];
-			//real mfbaa = (D.f[BS  ])[kbs  ];
-			//real mfbca = (D.f[BN  ])[kbn  ];
-			//real mfbac = (D.f[TS  ])[kts  ];
-			//real mfbbb = (D.f[REST])[kzero];
-			//real mfccc = (D.f[TNE ])[ktne ];
-			//real mfaac = (D.f[TSW ])[ktsw ];
-			//real mfcac = (D.f[TSE ])[ktse ];
-			//real mfacc = (D.f[TNW ])[ktnw ];
-			//real mfcca = (D.f[BNE ])[kbne ];
-			//real mfaaa = (D.f[BSW ])[kbsw ];
-			//real mfcaa = (D.f[BSE ])[kbse ];
-			//real mfaca = (D.f[BNW ])[kbnw ];
+			//double mfabb = (D.f[DIR_P00   ])[k1e   ];
+			//double mfcbb = (D.f[DIR_M00   ])[k1w   ];
+			//double mfbab = (D.f[DIR_0P0   ])[k1n   ];
+			//double mfbcb = (D.f[DIR_0M0   ])[k1s   ];
+			//double mfbba = (D.f[DIR_00P   ])[k1t   ];
+			//double mfbbc = (D.f[DIR_00M   ])[k1b   ];
+			//double mfaab = (D.f[DIR_PP0  ])[k1ne  ];
+			//double mfccb = (D.f[DIR_MM0  ])[k1sw  ];
+			//double mfacb = (D.f[DIR_PM0  ])[k1se  ];
+			//double mfcab = (D.f[DIR_MP0  ])[k1nw  ];
+			//double mfaba = (D.f[DIR_P0P  ])[k1te  ];
+			//double mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
+			//double mfabc = (D.f[DIR_P0M  ])[k1be  ];
+			//double mfcba = (D.f[DIR_M0P  ])[k1tw  ];
+			//double mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
+			//double mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
+			//double mfbac = (D.f[DIR_0PM  ])[k1bn  ];
+			//double mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+			//double mfbbb = (D.f[DIR_000])[k1zero];
+			//double mfaaa = (D.f[DIR_PPP ])[k1tne ];
+			//double mfcca = (D.f[DIR_MMP ])[k1tsw ];
+			//double mfaca = (D.f[DIR_PMP ])[k1tse ];
+			//double mfcaa = (D.f[DIR_MPP ])[k1tnw ];
+			//double mfaac = (D.f[DIR_PPM ])[k1bne ];
+			//double mfccc = (D.f[DIR_MMM ])[k1bsw ];
+			//double mfacc = (D.f[DIR_PMM ])[k1bse ];
+			//double mfcac = (D.f[DIR_MPM ])[k1bnw ];
+			real mfabb = (D.f[DIR_P00   ])[k1e   ];
+			real mfcbb = (D.f[DIR_M00   ])[k1w   ];
+			real mfbab = (D.f[DIR_0P0   ])[k1n   ];
+			real mfbcb = (D.f[DIR_0M0   ])[k1s   ];
+			real mfbba = (D.f[DIR_00P   ])[k1t   ];
+			real mfbbc = (D.f[DIR_00M   ])[k1b   ];
+			real mfaab = (D.f[DIR_PP0  ])[k1ne  ];
+			real mfccb = (D.f[DIR_MM0  ])[k1sw  ];
+			real mfacb = (D.f[DIR_PM0  ])[k1se  ];
+			real mfcab = (D.f[DIR_MP0  ])[k1nw  ];
+			real mfaba = (D.f[DIR_P0P  ])[k1te  ];
+			real mfcbc = (D.f[DIR_M0M  ])[k1bw  ];
+			real mfabc = (D.f[DIR_P0M  ])[k1be  ];
+			real mfcba = (D.f[DIR_M0P  ])[k1tw  ];
+			real mfbaa = (D.f[DIR_0PP  ])[k1tn  ];
+			real mfbcc = (D.f[DIR_0MM  ])[k1bs  ];
+			real mfbac = (D.f[DIR_0PM  ])[k1bn  ];
+			real mfbca = (D.f[DIR_0MP  ])[k1ts  ];
+			real mfbbb = (D.f[DIR_000])[k1zero];
+			real mfaaa = (D.f[DIR_PPP ])[k1tne ];
+			real mfcca = (D.f[DIR_MMP ])[k1tsw ];
+			real mfaca = (D.f[DIR_PMP ])[k1tse ];
+			real mfcaa = (D.f[DIR_MPP ])[k1tnw ];
+			real mfaac = (D.f[DIR_PPM ])[k1bne ];
+			real mfccc = (D.f[DIR_MMM ])[k1bsw ];
+			real mfacc = (D.f[DIR_PMM ])[k1bse ];
+			real mfcac = (D.f[DIR_MPM ])[k1bnw ];
+
+			//real mfcbb = (D.f[DIR_P00   ])[ke   ];
+			//real mfabb = (D.f[DIR_M00   ])[kw   ];
+			//real mfbcb = (D.f[DIR_0P0   ])[kn   ];
+			//real mfbab = (D.f[DIR_0M0   ])[ks   ];
+			//real mfbbc = (D.f[DIR_00P   ])[kt   ];
+			//real mfbba = (D.f[DIR_00M   ])[kb   ];
+			//real mfccb = (D.f[DIR_PP0  ])[kne  ];
+			//real mfaab = (D.f[DIR_MM0  ])[ksw  ];
+			//real mfcab = (D.f[DIR_PM0  ])[kse  ];
+			//real mfacb = (D.f[DIR_MP0  ])[knw  ];
+			//real mfcbc = (D.f[DIR_P0P  ])[kte  ];
+			//real mfaba = (D.f[DIR_M0M  ])[kbw  ];
+			//real mfcba = (D.f[DIR_P0M  ])[kbe  ];
+			//real mfabc = (D.f[DIR_M0P  ])[ktw  ];
+			//real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
+			//real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
+			//real mfbca = (D.f[DIR_0PM  ])[kbn  ];
+			//real mfbac = (D.f[DIR_0MP  ])[kts  ];
+			//real mfbbb = (D.f[DIR_000])[kzero];
+			//real mfccc = (D.f[DIR_PPP ])[ktne ];
+			//real mfaac = (D.f[DIR_MMP ])[ktsw ];
+			//real mfcac = (D.f[DIR_PMP ])[ktse ];
+			//real mfacc = (D.f[DIR_MPP ])[ktnw ];
+			//real mfcca = (D.f[DIR_PPM ])[kbne ];
+			//real mfaaa = (D.f[DIR_MMM ])[kbsw ];
+			//real mfcaa = (D.f[DIR_PMM ])[kbse ];
+			//real mfaca = (D.f[DIR_MPM ])[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			//real rho   = (((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 			//				(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -4022,61 +4130,61 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
 			////////////////////////////////////////////////////////////////////////////////////////
 			////round off error test
 			//if(vvx!=zero){
-			//	(kDistTest.f[E   ])[k] = mfabb;
-			//	(kDistTest.f[W   ])[k] = mfcbb;
-			//	(kDistTest.f[N   ])[k] = mfbab;
-			//	(kDistTest.f[S   ])[k] = mfbcb;
-			//	(kDistTest.f[T   ])[k] = mfbba;
-			//	(kDistTest.f[B   ])[k] = mfbbc;
-			//	(kDistTest.f[NE  ])[k] = mfaab;
-			//	(kDistTest.f[SW  ])[k] = mfccb;
-			//	(kDistTest.f[SE  ])[k] = mfacb;
-			//	(kDistTest.f[NW  ])[k] = mfcab;
-			//	(kDistTest.f[TE  ])[k] = mfaba;
-			//	(kDistTest.f[BW  ])[k] = mfcbc;
-			//	(kDistTest.f[BE  ])[k] = mfabc;
-			//	(kDistTest.f[TW  ])[k] = mfcba;
-			//	(kDistTest.f[TN  ])[k] = mfbaa;
-			//	(kDistTest.f[BS  ])[k] = mfbcc;
-			//	(kDistTest.f[BN  ])[k] = mfbac;
-			//	(kDistTest.f[TS  ])[k] = mfbca;
-			//	(kDistTest.f[REST])[k] = KQK;
-			//	(kDistTest.f[TNE ])[k] = mfaaa;
-			//	(kDistTest.f[TSW ])[k] = mfcca;
-			//	(kDistTest.f[TSE ])[k] = mfaca;
-			//	(kDistTest.f[TNW ])[k] = mfcaa;
-			//	(kDistTest.f[BNE ])[k] = mfaac;
-			//	(kDistTest.f[BSW ])[k] = mfccc;
-			//	(kDistTest.f[BSE ])[k] = mfacc;
-			//	(kDistTest.f[BNW ])[k] = mfcac;
+			//	(kDistTest.f[DIR_P00   ])[k] = mfabb;
+			//	(kDistTest.f[DIR_M00   ])[k] = mfcbb;
+			//	(kDistTest.f[DIR_0P0   ])[k] = mfbab;
+			//	(kDistTest.f[DIR_0M0   ])[k] = mfbcb;
+			//	(kDistTest.f[DIR_00P   ])[k] = mfbba;
+			//	(kDistTest.f[DIR_00M   ])[k] = mfbbc;
+			//	(kDistTest.f[DIR_PP0  ])[k] = mfaab;
+			//	(kDistTest.f[DIR_MM0  ])[k] = mfccb;
+			//	(kDistTest.f[DIR_PM0  ])[k] = mfacb;
+			//	(kDistTest.f[DIR_MP0  ])[k] = mfcab;
+			//	(kDistTest.f[DIR_P0P  ])[k] = mfaba;
+			//	(kDistTest.f[DIR_M0M  ])[k] = mfcbc;
+			//	(kDistTest.f[DIR_P0M  ])[k] = mfabc;
+			//	(kDistTest.f[DIR_M0P  ])[k] = mfcba;
+			//	(kDistTest.f[DIR_0PP  ])[k] = mfbaa;
+			//	(kDistTest.f[DIR_0MM  ])[k] = mfbcc;
+			//	(kDistTest.f[DIR_0PM  ])[k] = mfbac;
+			//	(kDistTest.f[DIR_0MP  ])[k] = mfbca;
+			//	(kDistTest.f[DIR_000])[k] = KQK;
+			//	(kDistTest.f[DIR_PPP ])[k] = mfaaa;
+			//	(kDistTest.f[DIR_MMP ])[k] = mfcca;
+			//	(kDistTest.f[DIR_PMP ])[k] = mfaca;
+			//	(kDistTest.f[DIR_MPP ])[k] = mfcaa;
+			//	(kDistTest.f[DIR_PPM ])[k] = mfaac;
+			//	(kDistTest.f[DIR_MMM ])[k] = mfccc;
+			//	(kDistTest.f[DIR_PMM ])[k] = mfacc;
+			//	(kDistTest.f[DIR_MPM ])[k] = mfcac;
 			//}else{
-			//	(kDistTest.f[E   ])[k] = zero;
-			//	(kDistTest.f[W   ])[k] = zero;
-			//	(kDistTest.f[N   ])[k] = zero;
-			//	(kDistTest.f[S   ])[k] = zero;
-			//	(kDistTest.f[T   ])[k] = zero;
-			//	(kDistTest.f[B   ])[k] = zero;
-			//	(kDistTest.f[NE  ])[k] = zero;
-			//	(kDistTest.f[SW  ])[k] = zero;
-			//	(kDistTest.f[SE  ])[k] = zero;
-			//	(kDistTest.f[NW  ])[k] = zero;
-			//	(kDistTest.f[TE  ])[k] = zero;
-			//	(kDistTest.f[BW  ])[k] = zero;
-			//	(kDistTest.f[BE  ])[k] = zero;
-			//	(kDistTest.f[TW  ])[k] = zero;
-			//	(kDistTest.f[TN  ])[k] = zero;
-			//	(kDistTest.f[BS  ])[k] = zero;
-			//	(kDistTest.f[BN  ])[k] = zero;
-			//	(kDistTest.f[TS  ])[k] = zero;
-			//	(kDistTest.f[REST])[k] = zero;
-			//	(kDistTest.f[TNE ])[k] = zero;
-			//	(kDistTest.f[TSW ])[k] = zero;
-			//	(kDistTest.f[TSE ])[k] = zero;
-			//	(kDistTest.f[TNW ])[k] = zero;
-			//	(kDistTest.f[BNE ])[k] = zero;
-			//	(kDistTest.f[BSW ])[k] = zero;
-			//	(kDistTest.f[BSE ])[k] = zero;
-			//	(kDistTest.f[BNW ])[k] = zero;
+			//	(kDistTest.f[DIR_P00   ])[k] = zero;
+			//	(kDistTest.f[DIR_M00   ])[k] = zero;
+			//	(kDistTest.f[DIR_0P0   ])[k] = zero;
+			//	(kDistTest.f[DIR_0M0   ])[k] = zero;
+			//	(kDistTest.f[DIR_00P   ])[k] = zero;
+			//	(kDistTest.f[DIR_00M   ])[k] = zero;
+			//	(kDistTest.f[DIR_PP0  ])[k] = zero;
+			//	(kDistTest.f[DIR_MM0  ])[k] = zero;
+			//	(kDistTest.f[DIR_PM0  ])[k] = zero;
+			//	(kDistTest.f[DIR_MP0  ])[k] = zero;
+			//	(kDistTest.f[DIR_P0P  ])[k] = zero;
+			//	(kDistTest.f[DIR_M0M  ])[k] = zero;
+			//	(kDistTest.f[DIR_P0M  ])[k] = zero;
+			//	(kDistTest.f[DIR_M0P  ])[k] = zero;
+			//	(kDistTest.f[DIR_0PP  ])[k] = zero;
+			//	(kDistTest.f[DIR_0MM  ])[k] = zero;
+			//	(kDistTest.f[DIR_0PM  ])[k] = zero;
+			//	(kDistTest.f[DIR_0MP  ])[k] = zero;
+			//	(kDistTest.f[DIR_000])[k] = zero;
+			//	(kDistTest.f[DIR_PPP ])[k] = zero;
+			//	(kDistTest.f[DIR_MMP ])[k] = zero;
+			//	(kDistTest.f[DIR_PMP ])[k] = zero;
+			//	(kDistTest.f[DIR_MPP ])[k] = zero;
+			//	(kDistTest.f[DIR_PPM ])[k] = zero;
+			//	(kDistTest.f[DIR_MMM ])[k] = zero;
+			//	(kDistTest.f[DIR_PMM ])[k] = zero;
+			//	(kDistTest.f[DIR_MPM ])[k] = zero;
 			//}
 
 			//////////////////////////////////////////////////////////////////////////////////////
@@ -4168,149 +4276,149 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //if (isEvenTimestep==true)
       //{
-      //   D.f[E   ] = &DD[E   *size_Mat];
-      //   D.f[W   ] = &DD[W   *size_Mat];
-      //   D.f[N   ] = &DD[N   *size_Mat];
-      //   D.f[S   ] = &DD[S   *size_Mat];
-      //   D.f[T   ] = &DD[T   *size_Mat];
-      //   D.f[B   ] = &DD[B   *size_Mat];
-      //   D.f[NE  ] = &DD[NE  *size_Mat];
-      //   D.f[SW  ] = &DD[SW  *size_Mat];
-      //   D.f[SE  ] = &DD[SE  *size_Mat];
-      //   D.f[NW  ] = &DD[NW  *size_Mat];
-      //   D.f[TE  ] = &DD[TE  *size_Mat];
-      //   D.f[BW  ] = &DD[BW  *size_Mat];
-      //   D.f[BE  ] = &DD[BE  *size_Mat];
-      //   D.f[TW  ] = &DD[TW  *size_Mat];
-      //   D.f[TN  ] = &DD[TN  *size_Mat];
-      //   D.f[BS  ] = &DD[BS  *size_Mat];
-      //   D.f[BN  ] = &DD[BN  *size_Mat];
-      //   D.f[TS  ] = &DD[TS  *size_Mat];
-      //   D.f[REST] = &DD[REST*size_Mat];
-      //   D.f[TNE ] = &DD[TNE *size_Mat];
-      //   D.f[TSW ] = &DD[TSW *size_Mat];
-      //   D.f[TSE ] = &DD[TSE *size_Mat];
-      //   D.f[TNW ] = &DD[TNW *size_Mat];
-      //   D.f[BNE ] = &DD[BNE *size_Mat];
-      //   D.f[BSW ] = &DD[BSW *size_Mat];
-      //   D.f[BSE ] = &DD[BSE *size_Mat];
-      //   D.f[BNW ] = &DD[BNW *size_Mat];
+      //   D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      //   D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      //   D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      //   D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      //   D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      //   D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      //   D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      //   D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      //   D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      //   D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      //   D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      //   D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      //   D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      //   D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      //   D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      //   D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      //   D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      //   D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      //   D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      //   D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      //   D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      //   D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      //   D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      //   D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      //   D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      //   D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       //} 
       //else
       //{
-      //   D.f[W   ] = &DD[E   *size_Mat];
-      //   D.f[E   ] = &DD[W   *size_Mat];
-      //   D.f[S   ] = &DD[N   *size_Mat];
-      //   D.f[N   ] = &DD[S   *size_Mat];
-      //   D.f[B   ] = &DD[T   *size_Mat];
-      //   D.f[T   ] = &DD[B   *size_Mat];
-      //   D.f[SW  ] = &DD[NE  *size_Mat];
-      //   D.f[NE  ] = &DD[SW  *size_Mat];
-      //   D.f[NW  ] = &DD[SE  *size_Mat];
-      //   D.f[SE  ] = &DD[NW  *size_Mat];
-      //   D.f[BW  ] = &DD[TE  *size_Mat];
-      //   D.f[TE  ] = &DD[BW  *size_Mat];
-      //   D.f[TW  ] = &DD[BE  *size_Mat];
-      //   D.f[BE  ] = &DD[TW  *size_Mat];
-      //   D.f[BS  ] = &DD[TN  *size_Mat];
-      //   D.f[TN  ] = &DD[BS  *size_Mat];
-      //   D.f[TS  ] = &DD[BN  *size_Mat];
-      //   D.f[BN  ] = &DD[TS  *size_Mat];
-      //   D.f[REST] = &DD[REST*size_Mat];
-      //   D.f[TNE ] = &DD[BSW *size_Mat];
-      //   D.f[TSW ] = &DD[BNE *size_Mat];
-      //   D.f[TSE ] = &DD[BNW *size_Mat];
-      //   D.f[TNW ] = &DD[BSE *size_Mat];
-      //   D.f[BNE ] = &DD[TSW *size_Mat];
-      //   D.f[BSW ] = &DD[TNE *size_Mat];
-      //   D.f[BSE ] = &DD[TNW *size_Mat];
-      //   D.f[BNW ] = &DD[TSE *size_Mat];
+      //   D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      //   D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      //   D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      //   D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      //   D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      //   D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      //   D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      //   D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      //   D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      //   D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      //   D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      //   D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      //   D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      //   D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      //   D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      //   D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      //   D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      //   D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      //   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      //   D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      //   D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      //   D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      //   D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      //   D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      //   D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      //   D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      //   D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       //}
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 
-			(D.f[E   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[W   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[N   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[S   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[T   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[B   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[NE  ])[kne  ] = mfaab;//mfccb;
-			(D.f[SW  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[SE  ])[kse  ] = mfacb;//mfcab;
-			(D.f[NW  ])[knw  ] = mfcab;//mfacb;
-			(D.f[TE  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[BW  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[BE  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[TW  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[TN  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[BS  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[BN  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[TS  ])[kts  ] = mfbca;//mfbac;
-			(D.f[REST])[kzero] = mfbbb;//mfbbb;
-			(D.f[TNE ])[ktne ] = mfaaa;//mfccc;
-			(D.f[TSW ])[ktsw ] = mfcca;//mfaac;
-			(D.f[TSE ])[ktse ] = mfaca;//mfcac;
-			(D.f[TNW ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[BNE ])[kbne ] = mfaac;//mfcca;
-			(D.f[BSW ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[BSE ])[kbse ] = mfacc;//mfcaa;
-			(D.f[BNW ])[kbnw ] = mfcac;//mfaca;
-			//(D.f[E   ])[ke   ] = mfcbb;
-			//(D.f[W   ])[kw   ] = mfabb;
-			//(D.f[N   ])[kn   ] = mfbcb;
-			//(D.f[S   ])[ks   ] = mfbab;
-			//(D.f[T   ])[kt   ] = mfbbc;
-			//(D.f[B   ])[kb   ] = mfbba;
-			//(D.f[NE  ])[kne  ] = mfccb;
-			//(D.f[SW  ])[ksw  ] = mfaab;
-			//(D.f[SE  ])[kse  ] = mfcab;
-			//(D.f[NW  ])[knw  ] = mfacb;
-			//(D.f[TE  ])[kte  ] = mfcbc;
-			//(D.f[BW  ])[kbw  ] = mfaba;
-			//(D.f[BE  ])[kbe  ] = mfcba;
-			//(D.f[TW  ])[ktw  ] = mfabc;
-			//(D.f[TN  ])[ktn  ] = mfbcc;
-			//(D.f[BS  ])[kbs  ] = mfbaa;
-			//(D.f[BN  ])[kbn  ] = mfbca;
-			//(D.f[TS  ])[kts  ] = mfbac;
-			//(D.f[REST])[kzero] = mfbbb;
-			//(D.f[TNE ])[ktne ] = mfccc;
-			//(D.f[TSW ])[ktsw ] = mfaac;
-			//(D.f[TSE ])[ktse ] = mfcac;
-			//(D.f[TNW ])[ktnw ] = mfacc;
-			//(D.f[BNE ])[kbne ] = mfcca;
-			//(D.f[BSW ])[kbsw ] = mfaaa;
-			//(D.f[BSE ])[kbse ] = mfcaa;
-			//(D.f[BNW ])[kbnw ] = mfaca;
-
-      //(D.f[E   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;  
-      //(D.f[W   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;  
-      //(D.f[N   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;  
-      //(D.f[S   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;  
-      //(D.f[T   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;  
-      //(D.f[B   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;  
-      //(D.f[NE  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;  
-      //(D.f[SW  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;  
-      //(D.f[SE  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;  
-      //(D.f[NW  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;  
-      //(D.f[TE  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;  
-      //(D.f[BW  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;  
-      //(D.f[BE  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;  
-      //(D.f[TW  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;  
-      //(D.f[TN  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;  
-      //(D.f[BS  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;  
-      //(D.f[BN  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;  
-      //(D.f[TS  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;  
-      //(D.f[REST])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO;
-      //(D.f[TNE ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE; 
-      //(D.f[BSW ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW; 
-      //(D.f[BNE ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE; 
-      //(D.f[TSW ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW; 
-      //(D.f[TSE ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE; 
-      //(D.f[BNW ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW; 
-      //(D.f[BSE ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE; 
-      //(D.f[TNW ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW; 
+			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
+			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
+			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
+			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
+			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
+			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
+			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
+			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
+			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
+			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
+			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
+			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
+			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
+			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
+			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
+			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
+			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
+			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
+			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
+			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
+			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
+			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
+			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
+			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
+			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
+			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
+			//(D.f[DIR_P00   ])[ke   ] = mfcbb;
+			//(D.f[DIR_M00   ])[kw   ] = mfabb;
+			//(D.f[DIR_0P0   ])[kn   ] = mfbcb;
+			//(D.f[DIR_0M0   ])[ks   ] = mfbab;
+			//(D.f[DIR_00P   ])[kt   ] = mfbbc;
+			//(D.f[DIR_00M   ])[kb   ] = mfbba;
+			//(D.f[DIR_PP0  ])[kne  ] = mfccb;
+			//(D.f[DIR_MM0  ])[ksw  ] = mfaab;
+			//(D.f[DIR_PM0  ])[kse  ] = mfcab;
+			//(D.f[DIR_MP0  ])[knw  ] = mfacb;
+			//(D.f[DIR_P0P  ])[kte  ] = mfcbc;
+			//(D.f[DIR_M0M  ])[kbw  ] = mfaba;
+			//(D.f[DIR_P0M  ])[kbe  ] = mfcba;
+			//(D.f[DIR_M0P  ])[ktw  ] = mfabc;
+			//(D.f[DIR_0PP  ])[ktn  ] = mfbcc;
+			//(D.f[DIR_0MM  ])[kbs  ] = mfbaa;
+			//(D.f[DIR_0PM  ])[kbn  ] = mfbca;
+			//(D.f[DIR_0MP  ])[kts  ] = mfbac;
+			//(D.f[DIR_000])[kzero] = mfbbb;
+			//(D.f[DIR_PPP ])[ktne ] = mfccc;
+			//(D.f[DIR_MMP ])[ktsw ] = mfaac;
+			//(D.f[DIR_PMP ])[ktse ] = mfcac;
+			//(D.f[DIR_MPP ])[ktnw ] = mfacc;
+			//(D.f[DIR_PPM ])[kbne ] = mfcca;
+			//(D.f[DIR_MMM ])[kbsw ] = mfaaa;
+			//(D.f[DIR_PMM ])[kbse ] = mfcaa;
+			//(D.f[DIR_MPM ])[kbnw ] = mfaca;
+
+      //(D.f[DIR_P00   ])[ke   ] = fE ;  //f1_E ;   //fW;    //fE ;  
+      //(D.f[DIR_M00   ])[kw   ] = fW ;  //f1_W ;   //fE;    //fW ;  
+      //(D.f[DIR_0P0   ])[kn   ] = fN ;  //f1_N ;   //fS;    //fN ;  
+      //(D.f[DIR_0M0   ])[ks   ] = fS ;  //f1_S ;   //fN;    //fS ;  
+      //(D.f[DIR_00P   ])[kt   ] = fT ;  //f1_T ;   //fB;    //fT ;  
+      //(D.f[DIR_00M   ])[kb   ] = fB ;  //f1_B ;   //fT;    //fB ;  
+      //(D.f[DIR_PP0  ])[kne  ] = fNE;  //f1_NE;   //fSW;   //fNE;  
+      //(D.f[DIR_MM0  ])[ksw  ] = fSW;  //f1_SW;   //fNE;   //fSW;  
+      //(D.f[DIR_PM0  ])[kse  ] = fSE;  //f1_SE;   //fNW;   //fSE;  
+      //(D.f[DIR_MP0  ])[knw  ] = fNW;  //f1_NW;   //fSE;   //fNW;  
+      //(D.f[DIR_P0P  ])[kte  ] = fTE;  //f1_TE;   //fBW;   //fTE;  
+      //(D.f[DIR_M0M  ])[kbw  ] = fBW;  //f1_BW;   //fTE;   //fBW;  
+      //(D.f[DIR_P0M  ])[kbe  ] = fBE;  //f1_BE;   //fTW;   //fBE;  
+      //(D.f[DIR_M0P  ])[ktw  ] = fTW;  //f1_TW;   //fBE;   //fTW;  
+      //(D.f[DIR_0PP  ])[ktn  ] = fTN;  //f1_TN;   //fBS;   //fTN;  
+      //(D.f[DIR_0MM  ])[kbs  ] = fBS;  //f1_BS;   //fTN;   //fBS;  
+      //(D.f[DIR_0PM  ])[kbn  ] = fBN;  //f1_BN;   //fTS;   //fBN;  
+      //(D.f[DIR_0MP  ])[kts  ] = fTS;  //f1_TS;   //fBN;   //fTS;  
+      //(D.f[DIR_000])[kzero] = fZERO;//f1_ZERO; //fZERO; //fZERO;
+      //(D.f[DIR_PPP ])[ktne ] = fTNE; //f1_TNE;  //fBSW;  //fTNE; 
+      //(D.f[DIR_MMM ])[kbsw ] = fBSW; //f1_BSW;  //fTNE;  //fBSW; 
+      //(D.f[DIR_PPM ])[kbne ] = fBNE; //f1_BNE;  //fTSW;  //fBNE; 
+      //(D.f[DIR_MMP ])[ktsw ] = fTSW; //f1_TSW;  //fBNE;  //fTSW; 
+      //(D.f[DIR_PMP ])[ktse ] = fTSE; //f1_TSE;  //fBNW;  //fTSE; 
+      //(D.f[DIR_MPM ])[kbnw ] = fBNW; //f1_BNW;  //fTSE;  //fBNW; 
+      //(D.f[DIR_PMM ])[kbse ] = fBSE; //f1_BSE;  //fTNW;  //fBSE; 
+      //(D.f[DIR_MPP ])[ktnw ] = fTNW; //f1_TNW;  //fBSE;  //fTNW; 
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4354,9 +4462,9 @@ extern "C" __global__ void QPressDeviceEQZ27(real* rhoBC,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceZero27(	 real* DD, 
+__global__ void QPressDeviceZero27(	 real* DD, 
 												 int* k_Q, 
-												 int numberOfBCnodes, 
+												 unsigned int numberOfBCnodes, 
 												 unsigned int* neighborX,
 												 unsigned int* neighborY,
 												 unsigned int* neighborZ,
@@ -4410,94 +4518,94 @@ extern "C" __global__ void QPressDeviceZero27(	 real* DD,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //__syncthreads();
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      (D.f[E   ])[ke   ] =c0o1;
-      (D.f[W   ])[kw   ] =c0o1;
-      (D.f[N   ])[kn   ] =c0o1;
-      (D.f[S   ])[ks   ] =c0o1;
-      (D.f[T   ])[kt   ] =c0o1;
-      (D.f[B   ])[kb   ] =c0o1;
-      (D.f[NE  ])[kne  ] =c0o1;
-      (D.f[SW  ])[ksw  ] =c0o1;
-      (D.f[SE  ])[kse  ] =c0o1;
-      (D.f[NW  ])[knw  ] =c0o1;
-      (D.f[TE  ])[kte  ] =c0o1;
-      (D.f[BW  ])[kbw  ] =c0o1;
-      (D.f[BE  ])[kbe  ] =c0o1;
-      (D.f[TW  ])[ktw  ] =c0o1;
-      (D.f[TN  ])[ktn  ] =c0o1;
-      (D.f[BS  ])[kbs  ] =c0o1;
-      (D.f[BN  ])[kbn  ] =c0o1;
-      (D.f[TS  ])[kts  ] =c0o1;
-      (D.f[REST])[kzero] =c0o1;
-      (D.f[TNE ])[ktne ] =c0o1;
-      (D.f[TSW ])[ktsw ] =c0o1;
-      (D.f[TSE ])[ktse ] =c0o1;
-      (D.f[TNW ])[ktnw ] =c0o1;
-      (D.f[BNE ])[kbne ] =c0o1;
-      (D.f[BSW ])[kbsw ] =c0o1;
-      (D.f[BSE ])[kbse ] =c0o1;
-      (D.f[BNW ])[kbnw ] =c0o1;
+      (D.f[DIR_P00   ])[ke   ] =c0o1;
+      (D.f[DIR_M00   ])[kw   ] =c0o1;
+      (D.f[DIR_0P0   ])[kn   ] =c0o1;
+      (D.f[DIR_0M0   ])[ks   ] =c0o1;
+      (D.f[DIR_00P   ])[kt   ] =c0o1;
+      (D.f[DIR_00M   ])[kb   ] =c0o1;
+      (D.f[DIR_PP0  ])[kne  ] =c0o1;
+      (D.f[DIR_MM0  ])[ksw  ] =c0o1;
+      (D.f[DIR_PM0  ])[kse  ] =c0o1;
+      (D.f[DIR_MP0  ])[knw  ] =c0o1;
+      (D.f[DIR_P0P  ])[kte  ] =c0o1;
+      (D.f[DIR_M0M  ])[kbw  ] =c0o1;
+      (D.f[DIR_P0M  ])[kbe  ] =c0o1;
+      (D.f[DIR_M0P  ])[ktw  ] =c0o1;
+      (D.f[DIR_0PP  ])[ktn  ] =c0o1;
+      (D.f[DIR_0MM  ])[kbs  ] =c0o1;
+      (D.f[DIR_0PM  ])[kbn  ] =c0o1;
+      (D.f[DIR_0MP  ])[kts  ] =c0o1;
+      (D.f[DIR_000])[kzero] =c0o1;
+      (D.f[DIR_PPP ])[ktne ] =c0o1;
+      (D.f[DIR_MMP ])[ktsw ] =c0o1;
+      (D.f[DIR_PMP ])[ktse ] =c0o1;
+      (D.f[DIR_MPP ])[ktnw ] =c0o1;
+      (D.f[DIR_PPM ])[kbne ] =c0o1;
+      (D.f[DIR_MMM ])[kbsw ] =c0o1;
+      (D.f[DIR_PMM ])[kbse ] =c0o1;
+      (D.f[DIR_MPM ])[kbnw ] =c0o1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4541,7 +4649,7 @@ extern "C" __global__ void QPressDeviceZero27(	 real* DD,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
+__global__ void QPressDeviceFake27(	 real* rhoBC,
 												 real* DD, 
 												 int* k_Q, 
 												 int* k_N, 
@@ -4630,95 +4738,95 @@ extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[E   ])[k1e   ];
-      f1_E    = (D.f[W   ])[k1w   ];
-      f1_S    = (D.f[N   ])[k1n   ];
-      f1_N    = (D.f[S   ])[k1s   ];
-      f1_B    = (D.f[T   ])[k1t   ];
-      f1_T    = (D.f[B   ])[k1b   ];
-      f1_SW   = (D.f[NE  ])[k1ne  ];
-      f1_NE   = (D.f[SW  ])[k1sw  ];
-      f1_NW   = (D.f[SE  ])[k1se  ];
-      f1_SE   = (D.f[NW  ])[k1nw  ];
-      f1_BW   = (D.f[TE  ])[k1te  ];
-      f1_TE   = (D.f[BW  ])[k1bw  ];
-      f1_TW   = (D.f[BE  ])[k1be  ];
-      f1_BE   = (D.f[TW  ])[k1tw  ];
-      f1_BS   = (D.f[TN  ])[k1tn  ];
-      f1_TN   = (D.f[BS  ])[k1bs  ];
-      f1_TS   = (D.f[BN  ])[k1bn  ];
-      f1_BN   = (D.f[TS  ])[k1ts  ];
-      f1_ZERO = (D.f[REST])[k1zero];
-      f1_BSW  = (D.f[TNE ])[k1tne ];
-      f1_BNE  = (D.f[TSW ])[k1tsw ];
-      f1_BNW  = (D.f[TSE ])[k1tse ];
-      f1_BSE  = (D.f[TNW ])[k1tnw ];
-      f1_TSW  = (D.f[BNE ])[k1bne ];
-      f1_TNE  = (D.f[BSW ])[k1bsw ];
-      f1_TNW  = (D.f[BSE ])[k1bse ];
-      f1_TSE  = (D.f[BNW ])[k1bnw ];
+      f1_W    = (D.f[DIR_P00   ])[k1e   ];
+      f1_E    = (D.f[DIR_M00   ])[k1w   ];
+      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+      f1_B    = (D.f[DIR_00P   ])[k1t   ];
+      f1_T    = (D.f[DIR_00M   ])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_ZERO = (D.f[DIR_000])[k1zero];
+      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3;
@@ -4745,33 +4853,33 @@ extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
 
       __syncthreads();
 
-      (D.f[E   ])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[W   ])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[N   ])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[S   ])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[T   ])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[B   ])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[NE  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[SW  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[SE  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[NW  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TE  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BW  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BE  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TW  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TN  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BS  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[BN  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[TS  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
-      (D.f[REST])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
-      (D.f[TNE ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TSW ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TSE ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[TNW ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BNE ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BSW ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BSE ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
-      (D.f[BNW ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
+      (D.f[DIR_P00   ])[ke   ] = c2o27* (rhoBC[k]+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_M00   ])[kw   ] = c2o27* (rhoBC[k]+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_0P0   ])[kn   ] = c2o27* (rhoBC[k]+c3o1*(    -vx2    )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_0M0   ])[ks   ] = c2o27* (rhoBC[k]+c3o1*(     vx2    )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_00P   ])[kt   ] = c2o27* (rhoBC[k]+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_00M   ])[kb   ] = c2o27* (rhoBC[k]+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1o54*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_000])[kzero] = f1_ZERO-c8o27*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPP ])[ktne ] = f1_BSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMP ])[ktse ] = f1_BNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PPM ])[kbne ] = f1_TSW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_PMM ])[kbse ] = f1_TNW -c1o216*drho1;	//  c1o100;  // zero;  //
+      (D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1o216*drho1;  //  c1o100;  // zero;  //      
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4815,7 +4923,7 @@ extern "C" __global__ void QPressDeviceFake27(	 real* rhoBC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QPressDevice27_IntBB(real* rho,
+__global__ void QPressDevice27_IntBB(real* rho,
 												real* DD, 
 												int* k_Q, 
 												real* QQ,
@@ -4830,63 +4938,63 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4911,32 +5019,32 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[E   * numberOfBCnodes];
-		q_dirW   = &QQ[W   * numberOfBCnodes];
-		q_dirN   = &QQ[N   * numberOfBCnodes];
-		q_dirS   = &QQ[S   * numberOfBCnodes];
-		q_dirT   = &QQ[T   * numberOfBCnodes];
-		q_dirB   = &QQ[B   * numberOfBCnodes];
-		q_dirNE  = &QQ[NE  * numberOfBCnodes];
-		q_dirSW  = &QQ[SW  * numberOfBCnodes];
-		q_dirSE  = &QQ[SE  * numberOfBCnodes];
-		q_dirNW  = &QQ[NW  * numberOfBCnodes];
-		q_dirTE  = &QQ[TE  * numberOfBCnodes];
-		q_dirBW  = &QQ[BW  * numberOfBCnodes];
-		q_dirBE  = &QQ[BE  * numberOfBCnodes];
-		q_dirTW  = &QQ[TW  * numberOfBCnodes];
-		q_dirTN  = &QQ[TN  * numberOfBCnodes];
-		q_dirBS  = &QQ[BS  * numberOfBCnodes];
-		q_dirBN  = &QQ[BN  * numberOfBCnodes];
-		q_dirTS  = &QQ[TS  * numberOfBCnodes];
-		q_dirTNE = &QQ[TNE * numberOfBCnodes];
-		q_dirTSW = &QQ[TSW * numberOfBCnodes];
-		q_dirTSE = &QQ[TSE * numberOfBCnodes];
-		q_dirTNW = &QQ[TNW * numberOfBCnodes];
-		q_dirBNE = &QQ[BNE * numberOfBCnodes];
-		q_dirBSW = &QQ[BSW * numberOfBCnodes];
-		q_dirBSE = &QQ[BSE * numberOfBCnodes];
-		q_dirBNW = &QQ[BNW * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -4971,37 +5079,37 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
 			f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_W    = (D.f[E   ])[ke   ];
-		f_E    = (D.f[W   ])[kw   ];
-		f_S    = (D.f[N   ])[kn   ];
-		f_N    = (D.f[S   ])[ks   ];
-		f_B    = (D.f[T   ])[kt   ];
-		f_T    = (D.f[B   ])[kb   ];
-		f_SW   = (D.f[NE  ])[kne  ];
-		f_NE   = (D.f[SW  ])[ksw  ];
-		f_NW   = (D.f[SE  ])[kse  ];
-		f_SE   = (D.f[NW  ])[knw  ];
-		f_BW   = (D.f[TE  ])[kte  ];
-		f_TE   = (D.f[BW  ])[kbw  ];
-		f_TW   = (D.f[BE  ])[kbe  ];
-		f_BE   = (D.f[TW  ])[ktw  ];
-		f_BS   = (D.f[TN  ])[ktn  ];
-		f_TN   = (D.f[BS  ])[kbs  ];
-		f_TS   = (D.f[BN  ])[kbn  ];
-		f_BN   = (D.f[TS  ])[kts  ];
-		f_BSW  = (D.f[TNE ])[ktne ];
-		f_BNE  = (D.f[TSW ])[ktsw ];
-		f_BNW  = (D.f[TSE ])[ktse ];
-		f_BSE  = (D.f[TNW ])[ktnw ];
-		f_TSW  = (D.f[BNE ])[kbne ];
-		f_TNE  = (D.f[BSW ])[kbsw ];
-		f_TNW  = (D.f[BSE ])[kbse ];
-		f_TSE  = (D.f[BNW ])[kbnw ];
+		f_W    = (D.f[DIR_P00   ])[ke   ];
+		f_E    = (D.f[DIR_M00   ])[kw   ];
+		f_S    = (D.f[DIR_0P0   ])[kn   ];
+		f_N    = (D.f[DIR_0M0   ])[ks   ];
+		f_B    = (D.f[DIR_00P   ])[kt   ];
+		f_T    = (D.f[DIR_00M   ])[kb   ];
+		f_SW   = (D.f[DIR_PP0  ])[kne  ];
+		f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+		f_NW   = (D.f[DIR_PM0  ])[kse  ];
+		f_SE   = (D.f[DIR_MP0  ])[knw  ];
+		f_BW   = (D.f[DIR_P0P  ])[kte  ];
+		f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+		f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+		f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+		f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+		f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+		f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+		f_BN   = (D.f[DIR_0MP  ])[kts  ];
+		f_BSW  = (D.f[DIR_PPP ])[ktne ];
+		f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+		f_BNW  = (D.f[DIR_PMP ])[ktse ];
+		f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+		f_TSW  = (D.f[DIR_PPM ])[kbne ];
+		f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+		f_TNW  = (D.f[DIR_PMM ])[kbse ];
+		f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		real vx1, vx2, vx3, drho, feq, q;
 		drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 			f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
 		vx1    = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
 			((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -5021,67 +5129,67 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 		//////////////////////////////////////////////////////////////////////////
 		if (isEvenTimestep==false)
 		{
-			D.f[E   ] = &DD[E   *size_Mat];
-			D.f[W   ] = &DD[W   *size_Mat];
-			D.f[N   ] = &DD[N   *size_Mat];
-			D.f[S   ] = &DD[S   *size_Mat];
-			D.f[T   ] = &DD[T   *size_Mat];
-			D.f[B   ] = &DD[B   *size_Mat];
-			D.f[NE  ] = &DD[NE  *size_Mat];
-			D.f[SW  ] = &DD[SW  *size_Mat];
-			D.f[SE  ] = &DD[SE  *size_Mat];
-			D.f[NW  ] = &DD[NW  *size_Mat];
-			D.f[TE  ] = &DD[TE  *size_Mat];
-			D.f[BW  ] = &DD[BW  *size_Mat];
-			D.f[BE  ] = &DD[BE  *size_Mat];
-			D.f[TW  ] = &DD[TW  *size_Mat];
-			D.f[TN  ] = &DD[TN  *size_Mat];
-			D.f[BS  ] = &DD[BS  *size_Mat];
-			D.f[BN  ] = &DD[BN  *size_Mat];
-			D.f[TS  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[TNE *size_Mat];
-			D.f[TSW ] = &DD[TSW *size_Mat];
-			D.f[TSE ] = &DD[TSE *size_Mat];
-			D.f[TNW ] = &DD[TNW *size_Mat];
-			D.f[BNE ] = &DD[BNE *size_Mat];
-			D.f[BSW ] = &DD[BSW *size_Mat];
-			D.f[BSE ] = &DD[BSE *size_Mat];
-			D.f[BNW ] = &DD[BNW *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 		} 
 		else
 		{
-			D.f[W   ] = &DD[E   *size_Mat];
-			D.f[E   ] = &DD[W   *size_Mat];
-			D.f[S   ] = &DD[N   *size_Mat];
-			D.f[N   ] = &DD[S   *size_Mat];
-			D.f[B   ] = &DD[T   *size_Mat];
-			D.f[T   ] = &DD[B   *size_Mat];
-			D.f[SW  ] = &DD[NE  *size_Mat];
-			D.f[NE  ] = &DD[SW  *size_Mat];
-			D.f[NW  ] = &DD[SE  *size_Mat];
-			D.f[SE  ] = &DD[NW  *size_Mat];
-			D.f[BW  ] = &DD[TE  *size_Mat];
-			D.f[TE  ] = &DD[BW  *size_Mat];
-			D.f[TW  ] = &DD[BE  *size_Mat];
-			D.f[BE  ] = &DD[TW  *size_Mat];
-			D.f[BS  ] = &DD[TN  *size_Mat];
-			D.f[TN  ] = &DD[BS  *size_Mat];
-			D.f[TS  ] = &DD[BN  *size_Mat];
-			D.f[BN  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[BSW *size_Mat];
-			D.f[TSW ] = &DD[BNE *size_Mat];
-			D.f[TSE ] = &DD[BNW *size_Mat];
-			D.f[TNW ] = &DD[BSE *size_Mat];
-			D.f[BNE ] = &DD[TSW *size_Mat];
-			D.f[BSW ] = &DD[TNE *size_Mat];
-			D.f[BSE ] = &DD[TNW *size_Mat];
-			D.f[BNW ] = &DD[TSE *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 		}
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//Test
-		//(D.f[REST])[k]=c1o10;
+		//(D.f[DIR_000])[k]=c1o10;
 		real rhoDiff = drho - rho[k];
 		real VeloX = vx1;
 		real VeloY = vx2;
@@ -5092,182 +5200,182 @@ extern "C" __global__ void QPressDevice27_IntBB(real* rho,
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*( vx1        )*( vx1        )-cu_sq); 
-			(D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
+			(D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c2o27*(rhoDiff + c6o1*( VeloX     )))/(c1o1+q);
 		}
 
 		q = q_dirW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-			(D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
+			(D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c2o27*(rhoDiff + c6o1*(-VeloX     )))/(c1o1+q);
 		}
 
 		q = q_dirN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-			(D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
+			(D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c2o27*(rhoDiff + c6o1*( VeloY     )))/(c1o1+q);
 		}
 
 		q = q_dirS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-			(D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
+			(D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c2o27*(rhoDiff + c6o1*(-VeloY     )))/(c1o1+q);
 		}
 
 		q = q_dirT[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*(         vx3)*(         vx3)-cu_sq); 
-			(D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
+			(D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c2o27*(rhoDiff + c6o1*( VeloZ     )))/(c1o1+q);
 		}
 
 		q = q_dirB[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-			(D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
+			(D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c2o27*(rhoDiff + c6o1*(-VeloZ     )))/(c1o1+q);
 		}
 
 		q = q_dirNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-			(D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
+			(D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c1o54*(rhoDiff + c6o1*(VeloX+VeloY)))/(c1o1+q);
 		}
 
 		q = q_dirSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-			(D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
+			(D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloY)))/(c1o1+q);
 		}
 
 		q = q_dirSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-			(D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
+			(D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloY)))/(c1o1+q);
 		}
 
 		q = q_dirNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-			(D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
+			(D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloY)))/(c1o1+q);
 		}
 
 		q = q_dirTE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-			(D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
+			(D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c1o54*(rhoDiff + c6o1*( VeloX+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-			(D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
+			(D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c1o54*(rhoDiff + c6o1*(-VeloX-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-			(D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
+			(D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c1o54*(rhoDiff + c6o1*( VeloX-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-			(D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
+			(D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c1o54*(rhoDiff + c6o1*(-VeloX+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-			(D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c1o54*(rhoDiff + c6o1*( VeloY+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-			(D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c1o54*(rhoDiff + c6o1*( -VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-			(D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c1o54*(rhoDiff + c6o1*( VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-			(D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c1o54*(rhoDiff + c6o1*( -VeloY+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-			(D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-			(D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-			(D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c1o216*(rhoDiff + c6o1*( VeloX+VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-			(D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c1o216*(rhoDiff + c6o1*(-VeloX-VeloY+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-			(D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY+VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-			(D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirBSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-			(D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
+			(D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c1o216*(rhoDiff + c6o1*( VeloX-VeloY-VeloZ)))/(c1o1+q);
 		}
 
 		q = q_dirTNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-			(D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
+			(D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c1o216*(rhoDiff + c6o1*(-VeloX+VeloY+VeloZ)))/(c1o1+q);
 		}
 	}
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/Random.cu b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
index 04e3d3aba2f168043fecb3b83498671cd8474de7..a605fbd42d2977e0f0b6e15aeb50f8c78654f31c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/Random.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/Random.cu
@@ -12,7 +12,7 @@ using namespace vf::lbm::dir;
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void initRandom(curandState* state)
+__global__ void initRandom(curandState* state)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -36,7 +36,7 @@ extern "C" __global__ void initRandom(curandState* state)
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void generateRandomValues(curandState* state, real* randArray)
+__global__ void generateRandomValues(curandState* state, real* randArray)
 {
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
index 1347f8dd0e5031d55a4417b3ba59493d0f6db311..1fb4c558d52d243848f9192abf259980d406d644 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_0817_comp_27( real* DC, 
+__global__ void scaleCF_0817_comp_27( real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -43,33 +43,33 @@ extern "C" __global__ void scaleCF_0817_comp_27( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[E   *size_MatF];
-	fM00dest = &DF[W   *size_MatF];
-	f0P0dest = &DF[N   *size_MatF];
-	f0M0dest = &DF[S   *size_MatF];
-	f00Pdest = &DF[T   *size_MatF];
-	f00Mdest = &DF[B   *size_MatF];
-	fPP0dest = &DF[NE  *size_MatF];
-	fMM0dest = &DF[SW  *size_MatF];
-	fPM0dest = &DF[SE  *size_MatF];
-	fMP0dest = &DF[NW  *size_MatF];
-	fP0Pdest = &DF[TE  *size_MatF];
-	fM0Mdest = &DF[BW  *size_MatF];
-	fP0Mdest = &DF[BE  *size_MatF];
-	fM0Pdest = &DF[TW  *size_MatF];
-	f0PPdest = &DF[TN  *size_MatF];
-	f0MMdest = &DF[BS  *size_MatF];
-	f0PMdest = &DF[BN  *size_MatF];
-	f0MPdest = &DF[TS  *size_MatF];
-	f000dest = &DF[REST*size_MatF];
-	fMMMdest = &DF[BSW *size_MatF];
-	fMMPdest = &DF[TSW *size_MatF];
-	fMPPdest = &DF[TNW *size_MatF];
-	fMPMdest = &DF[BNW *size_MatF];
-	fPPMdest = &DF[BNE *size_MatF];
-	fPPPdest = &DF[TNE *size_MatF];
-	fPMPdest = &DF[TSE *size_MatF];
-	fPMMdest = &DF[BSE *size_MatF];
+	fP00dest = &DF[DIR_P00   *size_MatF];
+	fM00dest = &DF[DIR_M00   *size_MatF];
+	f0P0dest = &DF[DIR_0P0   *size_MatF];
+	f0M0dest = &DF[DIR_0M0   *size_MatF];
+	f00Pdest = &DF[DIR_00P   *size_MatF];
+	f00Mdest = &DF[DIR_00M   *size_MatF];
+	fPP0dest = &DF[DIR_PP0  *size_MatF];
+	fMM0dest = &DF[DIR_MM0  *size_MatF];
+	fPM0dest = &DF[DIR_PM0  *size_MatF];
+	fMP0dest = &DF[DIR_MP0  *size_MatF];
+	fP0Pdest = &DF[DIR_P0P  *size_MatF];
+	fM0Mdest = &DF[DIR_M0M  *size_MatF];
+	fP0Mdest = &DF[DIR_P0M  *size_MatF];
+	fM0Pdest = &DF[DIR_M0P  *size_MatF];
+	f0PPdest = &DF[DIR_0PP  *size_MatF];
+	f0MMdest = &DF[DIR_0MM  *size_MatF];
+	f0PMdest = &DF[DIR_0PM  *size_MatF];
+	f0MPdest = &DF[DIR_0MP  *size_MatF];
+	f000dest = &DF[DIR_000*size_MatF];
+	fMMMdest = &DF[DIR_MMM *size_MatF];
+	fMMPdest = &DF[DIR_MMP *size_MatF];
+	fMPPdest = &DF[DIR_MPP *size_MatF];
+	fMPMdest = &DF[DIR_MPM *size_MatF];
+	fPPMdest = &DF[DIR_PPM *size_MatF];
+	fPPPdest = &DF[DIR_PPP *size_MatF];
+	fPMPdest = &DF[DIR_PMP *size_MatF];
+	fPMMdest = &DF[DIR_PMM *size_MatF];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -78,63 +78,63 @@ extern "C" __global__ void scaleCF_0817_comp_27( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[E   *size_MatC];
-		fM00source = &DC[W   *size_MatC];
-		f0P0source = &DC[N   *size_MatC];
-		f0M0source = &DC[S   *size_MatC];
-		f00Psource = &DC[T   *size_MatC];
-		f00Msource = &DC[B   *size_MatC];
-		fPP0source = &DC[NE  *size_MatC];
-		fMM0source = &DC[SW  *size_MatC];
-		fPM0source = &DC[SE  *size_MatC];
-		fMP0source = &DC[NW  *size_MatC];
-		fP0Psource = &DC[TE  *size_MatC];
-		fM0Msource = &DC[BW  *size_MatC];
-		fP0Msource = &DC[BE  *size_MatC];
-		fM0Psource = &DC[TW  *size_MatC];
-		f0PPsource = &DC[TN  *size_MatC];
-		f0MMsource = &DC[BS  *size_MatC];
-		f0PMsource = &DC[BN  *size_MatC];
-		f0MPsource = &DC[TS  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[BSW *size_MatC];
-		fMMPsource = &DC[TSW *size_MatC];
-		fMPPsource = &DC[TNW *size_MatC];
-		fMPMsource = &DC[BNW *size_MatC];
-		fPPMsource = &DC[BNE *size_MatC];
-		fPPPsource = &DC[TNE *size_MatC];
-		fPMPsource = &DC[TSE *size_MatC];
-		fPMMsource = &DC[BSE *size_MatC];
+		fP00source = &DC[DIR_P00   *size_MatC];
+		fM00source = &DC[DIR_M00   *size_MatC];
+		f0P0source = &DC[DIR_0P0   *size_MatC];
+		f0M0source = &DC[DIR_0M0   *size_MatC];
+		f00Psource = &DC[DIR_00P   *size_MatC];
+		f00Msource = &DC[DIR_00M   *size_MatC];
+		fPP0source = &DC[DIR_PP0  *size_MatC];
+		fMM0source = &DC[DIR_MM0  *size_MatC];
+		fPM0source = &DC[DIR_PM0  *size_MatC];
+		fMP0source = &DC[DIR_MP0  *size_MatC];
+		fP0Psource = &DC[DIR_P0P  *size_MatC];
+		fM0Msource = &DC[DIR_M0M  *size_MatC];
+		fP0Msource = &DC[DIR_P0M  *size_MatC];
+		fM0Psource = &DC[DIR_M0P  *size_MatC];
+		f0PPsource = &DC[DIR_0PP  *size_MatC];
+		f0MMsource = &DC[DIR_0MM  *size_MatC];
+		f0PMsource = &DC[DIR_0PM  *size_MatC];
+		f0MPsource = &DC[DIR_0MP  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_MMM *size_MatC];
+		fMMPsource = &DC[DIR_MMP *size_MatC];
+		fMPPsource = &DC[DIR_MPP *size_MatC];
+		fMPMsource = &DC[DIR_MPM *size_MatC];
+		fPPMsource = &DC[DIR_PPM *size_MatC];
+		fPPPsource = &DC[DIR_PPP *size_MatC];
+		fPMPsource = &DC[DIR_PMP *size_MatC];
+		fPMMsource = &DC[DIR_PMM *size_MatC];
 	}
 	else
 	{
-		fP00source = &DC[W   *size_MatC];
-		fM00source = &DC[E   *size_MatC];
-		f0P0source = &DC[S   *size_MatC];
-		f0M0source = &DC[N   *size_MatC];
-		f00Psource = &DC[B   *size_MatC];
-		f00Msource = &DC[T   *size_MatC];
-		fPP0source = &DC[SW  *size_MatC];
-		fMM0source = &DC[NE  *size_MatC];
-		fPM0source = &DC[NW  *size_MatC];
-		fMP0source = &DC[SE  *size_MatC];
-		fP0Psource = &DC[BW  *size_MatC];
-		fM0Msource = &DC[TE  *size_MatC];
-		fP0Msource = &DC[TW  *size_MatC];
-		fM0Psource = &DC[BE  *size_MatC];
-		f0PPsource = &DC[BS  *size_MatC];
-		f0MMsource = &DC[TN  *size_MatC];
-		f0PMsource = &DC[TS  *size_MatC];
-		f0MPsource = &DC[BN  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[TNE *size_MatC];
-		fMMPsource = &DC[BNE *size_MatC];
-		fMPPsource = &DC[BSE *size_MatC];
-		fMPMsource = &DC[TSE *size_MatC];
-		fPPMsource = &DC[TSW *size_MatC];
-		fPPPsource = &DC[BSW *size_MatC];
-		fPMPsource = &DC[BNW *size_MatC];
-		fPMMsource = &DC[TNW *size_MatC];
+		fP00source = &DC[DIR_M00   *size_MatC];
+		fM00source = &DC[DIR_P00   *size_MatC];
+		f0P0source = &DC[DIR_0M0   *size_MatC];
+		f0M0source = &DC[DIR_0P0   *size_MatC];
+		f00Psource = &DC[DIR_00M   *size_MatC];
+		f00Msource = &DC[DIR_00P   *size_MatC];
+		fPP0source = &DC[DIR_MM0  *size_MatC];
+		fMM0source = &DC[DIR_PP0  *size_MatC];
+		fPM0source = &DC[DIR_MP0  *size_MatC];
+		fMP0source = &DC[DIR_PM0  *size_MatC];
+		fP0Psource = &DC[DIR_M0M  *size_MatC];
+		fM0Msource = &DC[DIR_P0P  *size_MatC];
+		fP0Msource = &DC[DIR_M0P  *size_MatC];
+		fM0Psource = &DC[DIR_P0M  *size_MatC];
+		f0PPsource = &DC[DIR_0MM  *size_MatC];
+		f0MMsource = &DC[DIR_0PP  *size_MatC];
+		f0PMsource = &DC[DIR_0MP  *size_MatC];
+		f0MPsource = &DC[DIR_0PM  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_PPP *size_MatC];
+		fMMPsource = &DC[DIR_PPM *size_MatC];
+		fMPPsource = &DC[DIR_PMM *size_MatC];
+		fMPMsource = &DC[DIR_PMP *size_MatC];
+		fPPMsource = &DC[DIR_MMP *size_MatC];
+		fPPPsource = &DC[DIR_MMM *size_MatC];
+		fPMPsource = &DC[DIR_MPM *size_MatC];
+		fPMMsource = &DC[DIR_MPP *size_MatC];
 	}
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -4083,7 +4083,7 @@ extern "C" __global__ void scaleCF_0817_comp_27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC, 
+__global__ void scaleCF_AA2016_comp_27(real* DC, 
 												  real* DF, 
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -4109,96 +4109,96 @@ extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -10966,7 +10966,7 @@ extern "C" __global__ void scaleCF_AA2016_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, 
+__global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC, 
 														real* DF, 
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -10992,96 +10992,96 @@ extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -17841,7 +17841,7 @@ extern "C" __global__ void scaleCF_RhoSq_3rdMom_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_RhoSq_comp_27(real* DC, 
+__global__ void scaleCF_RhoSq_comp_27(real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -17867,96 +17867,96 @@ extern "C" __global__ void scaleCF_RhoSq_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -22089,7 +22089,7 @@ extern "C" __global__ void scaleCF_RhoSq_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_staggered_time_comp_27(   real* DC, 
+__global__ void scaleCF_staggered_time_comp_27(   real* DC, 
 															 real* DF, 
 															 unsigned int* neighborCX,
 															 unsigned int* neighborCY,
@@ -22115,96 +22115,96 @@ extern "C" __global__ void scaleCF_staggered_time_comp_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -26325,7 +26325,7 @@ extern "C" __global__ void scaleCF_staggered_time_comp_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_Fix_comp_27(  real* DC, 
+__global__ void scaleCF_Fix_comp_27(  real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -26351,96 +26351,96 @@ extern "C" __global__ void scaleCF_Fix_comp_27(  real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -31092,7 +31092,7 @@ extern "C" __global__ void scaleCF_Fix_comp_27(  real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_NSPress_27(   real* DC, 
+__global__ void scaleCF_NSPress_27(   real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -31118,96 +31118,96 @@ extern "C" __global__ void scaleCF_NSPress_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -35036,7 +35036,7 @@ extern "C" __global__ void scaleCF_NSPress_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_Fix_27(   real* DC, 
+__global__ void scaleCF_Fix_27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -35062,96 +35062,96 @@ extern "C" __global__ void scaleCF_Fix_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -39294,7 +39294,7 @@ extern "C" __global__ void scaleCF_Fix_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFpress27(   real* DC, 
+__global__ void scaleCFpress27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -39320,96 +39320,96 @@ extern "C" __global__ void scaleCFpress27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -40968,7 +40968,7 @@ extern "C" __global__ void scaleCFpress27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFLast27( real* DC, 
+__global__ void scaleCFLast27( real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -40994,96 +40994,96 @@ extern "C" __global__ void scaleCFLast27( real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -43203,7 +43203,7 @@ extern "C" __global__ void scaleCFLast27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThSMG7(    real* DC, 
+__global__ void scaleCFThSMG7(    real* DC, 
                                              real* DF,
                                              real* DD7C, 
                                              real* DD7F, 
@@ -43225,96 +43225,96 @@ extern "C" __global__ void scaleCFThSMG7(    real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
 
@@ -44430,7 +44430,7 @@ extern "C" __global__ void scaleCFThSMG7(    real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThS7(   real* DC, 
+__global__ void scaleCFThS7(   real* DC, 
                                           real* DF,
                                           real* DD7C, 
                                           real* DD7F, 
@@ -44451,96 +44451,96 @@ extern "C" __global__ void scaleCFThS7(   real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
 
@@ -45553,7 +45553,7 @@ extern "C" __global__ void scaleCFThS7(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFThS27(     real* DC, 
+__global__ void scaleCFThS27(     real* DC, 
                                              real* DF,
                                              real* DD27C, 
                                              real* DD27F, 
@@ -45575,188 +45575,188 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 {
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, /**fzeroF,*/ *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
 
    Distributions27 D27F;
-   D27F.f[E   ] = &DD27F[E   *size_MatF];
-   D27F.f[W   ] = &DD27F[W   *size_MatF];
-   D27F.f[N   ] = &DD27F[N   *size_MatF];
-   D27F.f[S   ] = &DD27F[S   *size_MatF];
-   D27F.f[T   ] = &DD27F[T   *size_MatF];
-   D27F.f[B   ] = &DD27F[B   *size_MatF];
-   D27F.f[NE  ] = &DD27F[NE  *size_MatF];
-   D27F.f[SW  ] = &DD27F[SW  *size_MatF];
-   D27F.f[SE  ] = &DD27F[SE  *size_MatF];
-   D27F.f[NW  ] = &DD27F[NW  *size_MatF];
-   D27F.f[TE  ] = &DD27F[TE  *size_MatF];
-   D27F.f[BW  ] = &DD27F[BW  *size_MatF];
-   D27F.f[BE  ] = &DD27F[BE  *size_MatF];
-   D27F.f[TW  ] = &DD27F[TW  *size_MatF];
-   D27F.f[TN  ] = &DD27F[TN  *size_MatF];
-   D27F.f[BS  ] = &DD27F[BS  *size_MatF];
-   D27F.f[BN  ] = &DD27F[BN  *size_MatF];
-   D27F.f[TS  ] = &DD27F[TS  *size_MatF];
-   D27F.f[REST] = &DD27F[REST*size_MatF];
-   D27F.f[TNE ] = &DD27F[TNE *size_MatF];
-   D27F.f[TSW ] = &DD27F[TSW *size_MatF];
-   D27F.f[TSE ] = &DD27F[TSE *size_MatF];
-   D27F.f[TNW ] = &DD27F[TNW *size_MatF];
-   D27F.f[BNE ] = &DD27F[BNE *size_MatF];
-   D27F.f[BSW ] = &DD27F[BSW *size_MatF];
-   D27F.f[BSE ] = &DD27F[BSE *size_MatF];
-   D27F.f[BNW ] = &DD27F[BNW *size_MatF];
+   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
+   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
+   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
+   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
+   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
+   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
+   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
+   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
+   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
+   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
+   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
+   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
+   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
+   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
+   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
+   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
+   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
+   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
+   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
+   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
+   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
+   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
+   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
+   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
+   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
+   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
+   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[E   ] = &DD27C[E   *size_MatC];
-      D27C.f[W   ] = &DD27C[W   *size_MatC];
-      D27C.f[N   ] = &DD27C[N   *size_MatC];
-      D27C.f[S   ] = &DD27C[S   *size_MatC];
-      D27C.f[T   ] = &DD27C[T   *size_MatC];
-      D27C.f[B   ] = &DD27C[B   *size_MatC];
-      D27C.f[NE  ] = &DD27C[NE  *size_MatC];
-      D27C.f[SW  ] = &DD27C[SW  *size_MatC];
-      D27C.f[SE  ] = &DD27C[SE  *size_MatC];
-      D27C.f[NW  ] = &DD27C[NW  *size_MatC];
-      D27C.f[TE  ] = &DD27C[TE  *size_MatC];
-      D27C.f[BW  ] = &DD27C[BW  *size_MatC];
-      D27C.f[BE  ] = &DD27C[BE  *size_MatC];
-      D27C.f[TW  ] = &DD27C[TW  *size_MatC];
-      D27C.f[TN  ] = &DD27C[TN  *size_MatC];
-      D27C.f[BS  ] = &DD27C[BS  *size_MatC];
-      D27C.f[BN  ] = &DD27C[BN  *size_MatC];
-      D27C.f[TS  ] = &DD27C[TS  *size_MatC];
-      D27C.f[REST] = &DD27C[REST*size_MatC];
-      D27C.f[TNE ] = &DD27C[TNE *size_MatC];
-      D27C.f[TSW ] = &DD27C[TSW *size_MatC];
-      D27C.f[TSE ] = &DD27C[TSE *size_MatC];
-      D27C.f[TNW ] = &DD27C[TNW *size_MatC];
-      D27C.f[BNE ] = &DD27C[BNE *size_MatC];
-      D27C.f[BSW ] = &DD27C[BSW *size_MatC];
-      D27C.f[BSE ] = &DD27C[BSE *size_MatC];
-      D27C.f[BNW ] = &DD27C[BNW *size_MatC];
+      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
+      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
+      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
+      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
+      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
+      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
+      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
+      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
+      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
+      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
+      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
+      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
+      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
+      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
+      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
+      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
+      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
+      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
+      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
+      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
+      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
+      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
+      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
+      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
+      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
+      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
+      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
    }
    else
    {
-      D27C.f[W   ] = &DD27C[E   *size_MatC];
-      D27C.f[E   ] = &DD27C[W   *size_MatC];
-      D27C.f[S   ] = &DD27C[N   *size_MatC];
-      D27C.f[N   ] = &DD27C[S   *size_MatC];
-      D27C.f[B   ] = &DD27C[T   *size_MatC];
-      D27C.f[T   ] = &DD27C[B   *size_MatC];
-      D27C.f[SW  ] = &DD27C[NE  *size_MatC];
-      D27C.f[NE  ] = &DD27C[SW  *size_MatC];
-      D27C.f[NW  ] = &DD27C[SE  *size_MatC];
-      D27C.f[SE  ] = &DD27C[NW  *size_MatC];
-      D27C.f[BW  ] = &DD27C[TE  *size_MatC];
-      D27C.f[TE  ] = &DD27C[BW  *size_MatC];
-      D27C.f[TW  ] = &DD27C[BE  *size_MatC];
-      D27C.f[BE  ] = &DD27C[TW  *size_MatC];
-      D27C.f[BS  ] = &DD27C[TN  *size_MatC];
-      D27C.f[TN  ] = &DD27C[BS  *size_MatC];
-      D27C.f[TS  ] = &DD27C[BN  *size_MatC];
-      D27C.f[BN  ] = &DD27C[TS  *size_MatC];
-      D27C.f[REST] = &DD27C[REST*size_MatC];
-      D27C.f[BSW ] = &DD27C[TNE *size_MatC];
-      D27C.f[BNE ] = &DD27C[TSW *size_MatC];
-      D27C.f[BNW ] = &DD27C[TSE *size_MatC];
-      D27C.f[BSE ] = &DD27C[TNW *size_MatC];
-      D27C.f[TSW ] = &DD27C[BNE *size_MatC];
-      D27C.f[TNE ] = &DD27C[BSW *size_MatC];
-      D27C.f[TNW ] = &DD27C[BSE *size_MatC];
-      D27C.f[TSE ] = &DD27C[BNW *size_MatC];
+      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
+      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
+      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
+      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
+      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
+      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
+      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
+      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
+      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
+      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
+      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
+      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
+      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
+      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
+      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
+      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
+      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
+      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
+      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
+      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
+      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
+      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
+      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
+      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
+      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
+      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
+      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -45856,33 +45856,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -45943,33 +45943,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46030,33 +46030,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46117,33 +46117,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46214,33 +46214,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46301,33 +46301,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46388,33 +46388,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46475,33 +46475,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
       f_BSE  = fbseC[kbs];
       f_BNW  = fbnwC[kbw];
       ////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27C.f[E   ])[kzero];//ke
-      f27W    =  (D27C.f[W   ])[kw   ];
-      f27N    =  (D27C.f[N   ])[kzero];//kn
-      f27S    =  (D27C.f[S   ])[ks   ];
-      f27T    =  (D27C.f[T   ])[kzero];//kt
-      f27B    =  (D27C.f[B   ])[kb   ];
-      f27NE   =  (D27C.f[NE  ])[kzero];//kne
-      f27SW   =  (D27C.f[SW  ])[ksw  ];
-      f27SE   =  (D27C.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27C.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27C.f[TE  ])[kzero];//kte
-      f27BW   =  (D27C.f[BW  ])[kbw  ];
-      f27BE   =  (D27C.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27C.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27C.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27C.f[BS  ])[kbs  ];
-      f27BN   =  (D27C.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27C.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27C.f[REST])[kzero];//kzero
-      f27TNE   = (D27C.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27C.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27C.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27C.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27C.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27C.f[BSW ])[kbsw ];
-      f27BSE   = (D27C.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27C.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27C.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27C.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27C.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27C.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27C.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27C.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27C.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27C.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27C.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27C.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27C.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27C.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27C.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27C.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27C.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27C.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27C.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27C.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27C.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27C.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27C.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27C.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27C.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27C.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27C.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27C.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27C.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_C_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -46619,33 +46619,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46697,33 +46697,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46775,33 +46775,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46853,33 +46853,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -46941,33 +46941,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47019,33 +47019,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47097,33 +47097,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
 
 
@@ -47175,33 +47175,33 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27F.f[REST])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
-      (D27F.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27F.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27F.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27F.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27F.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27F.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27F.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27F.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27F.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27F.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27F.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27F.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27F.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27F.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27F.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27F.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27F.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27F.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27F.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27F.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27F.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27F.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27F.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27F.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27F.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27F.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_000])[kzero] =   c8o27* Conc_F*(c1o1-cu_sq);
+      (D27F.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_F*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27F.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_F*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27F.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_F*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27F.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_F*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27F.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_F*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27F.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_F*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27F.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_F*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_F*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_F*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27F.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_F*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27F.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_F*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_F*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_F*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27F.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_F*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27F.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_F*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27F.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_F*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_F*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27F.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_F*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_F*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_F*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27F.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27F.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_F*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27F.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_F*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -47243,7 +47243,7 @@ extern "C" __global__ void scaleCFThS27(     real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCFEff27(real* DC, 
+__global__ void scaleCFEff27(real* DC, 
                                         real* DF, 
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -47269,96 +47269,96 @@ extern "C" __global__ void scaleCFEff27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -48953,7 +48953,7 @@ extern "C" __global__ void scaleCFEff27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF27(real* DC, 
+__global__ void scaleCF27(real* DC, 
                                      real* DF, 
                                      unsigned int* neighborCX,
                                      unsigned int* neighborCY,
@@ -48978,96 +48978,96 @@ extern "C" __global__ void scaleCF27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
index 14e36971c87df7d83ab899cb141979ae9707bad9..cb8bd2a322cc9176cd0aa31625ee386e1f62d63d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleCF_F3_27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
+__global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 													 real* DF,
 													 real* G6,
 													 unsigned int* neighborCX,
@@ -44,33 +44,33 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[E   *size_MatF];
-	fM00dest = &DF[W   *size_MatF];
-	f0P0dest = &DF[N   *size_MatF];
-	f0M0dest = &DF[S   *size_MatF];
-	f00Pdest = &DF[T   *size_MatF];
-	f00Mdest = &DF[B   *size_MatF];
-	fPP0dest = &DF[NE  *size_MatF];
-	fMM0dest = &DF[SW  *size_MatF];
-	fPM0dest = &DF[SE  *size_MatF];
-	fMP0dest = &DF[NW  *size_MatF];
-	fP0Pdest = &DF[TE  *size_MatF];
-	fM0Mdest = &DF[BW  *size_MatF];
-	fP0Mdest = &DF[BE  *size_MatF];
-	fM0Pdest = &DF[TW  *size_MatF];
-	f0PPdest = &DF[TN  *size_MatF];
-	f0MMdest = &DF[BS  *size_MatF];
-	f0PMdest = &DF[BN  *size_MatF];
-	f0MPdest = &DF[TS  *size_MatF];
-	f000dest = &DF[REST*size_MatF];
-	fMMMdest = &DF[BSW *size_MatF];
-	fMMPdest = &DF[TSW *size_MatF];
-	fMPPdest = &DF[TNW *size_MatF];
-	fMPMdest = &DF[BNW *size_MatF];
-	fPPMdest = &DF[BNE *size_MatF];
-	fPPPdest = &DF[TNE *size_MatF];
-	fPMPdest = &DF[TSE *size_MatF];
-	fPMMdest = &DF[BSE *size_MatF];
+	fP00dest = &DF[DIR_P00   *size_MatF];
+	fM00dest = &DF[DIR_M00   *size_MatF];
+	f0P0dest = &DF[DIR_0P0   *size_MatF];
+	f0M0dest = &DF[DIR_0M0   *size_MatF];
+	f00Pdest = &DF[DIR_00P   *size_MatF];
+	f00Mdest = &DF[DIR_00M   *size_MatF];
+	fPP0dest = &DF[DIR_PP0  *size_MatF];
+	fMM0dest = &DF[DIR_MM0  *size_MatF];
+	fPM0dest = &DF[DIR_PM0  *size_MatF];
+	fMP0dest = &DF[DIR_MP0  *size_MatF];
+	fP0Pdest = &DF[DIR_P0P  *size_MatF];
+	fM0Mdest = &DF[DIR_M0M  *size_MatF];
+	fP0Mdest = &DF[DIR_P0M  *size_MatF];
+	fM0Pdest = &DF[DIR_M0P  *size_MatF];
+	f0PPdest = &DF[DIR_0PP  *size_MatF];
+	f0MMdest = &DF[DIR_0MM  *size_MatF];
+	f0PMdest = &DF[DIR_0PM  *size_MatF];
+	f0MPdest = &DF[DIR_0MP  *size_MatF];
+	f000dest = &DF[DIR_000*size_MatF];
+	fMMMdest = &DF[DIR_MMM *size_MatF];
+	fMMPdest = &DF[DIR_MMP *size_MatF];
+	fMPPdest = &DF[DIR_MPP *size_MatF];
+	fMPMdest = &DF[DIR_MPM *size_MatF];
+	fPPMdest = &DF[DIR_PPM *size_MatF];
+	fPPPdest = &DF[DIR_PPP *size_MatF];
+	fPMPdest = &DF[DIR_PMP *size_MatF];
+	fPMMdest = &DF[DIR_PMM *size_MatF];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -79,72 +79,72 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[E   *size_MatC];
-		fM00source = &DC[W   *size_MatC];
-		f0P0source = &DC[N   *size_MatC];
-		f0M0source = &DC[S   *size_MatC];
-		f00Psource = &DC[T   *size_MatC];
-		f00Msource = &DC[B   *size_MatC];
-		fPP0source = &DC[NE  *size_MatC];
-		fMM0source = &DC[SW  *size_MatC];
-		fPM0source = &DC[SE  *size_MatC];
-		fMP0source = &DC[NW  *size_MatC];
-		fP0Psource = &DC[TE  *size_MatC];
-		fM0Msource = &DC[BW  *size_MatC];
-		fP0Msource = &DC[BE  *size_MatC];
-		fM0Psource = &DC[TW  *size_MatC];
-		f0PPsource = &DC[TN  *size_MatC];
-		f0MMsource = &DC[BS  *size_MatC];
-		f0PMsource = &DC[BN  *size_MatC];
-		f0MPsource = &DC[TS  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[BSW *size_MatC];
-		fMMPsource = &DC[TSW *size_MatC];
-		fMPPsource = &DC[TNW *size_MatC];
-		fMPMsource = &DC[BNW *size_MatC];
-		fPPMsource = &DC[BNE *size_MatC];
-		fPPPsource = &DC[TNE *size_MatC];
-		fPMPsource = &DC[TSE *size_MatC];
-		fPMMsource = &DC[BSE *size_MatC];
+		fP00source = &DC[DIR_P00   *size_MatC];
+		fM00source = &DC[DIR_M00   *size_MatC];
+		f0P0source = &DC[DIR_0P0   *size_MatC];
+		f0M0source = &DC[DIR_0M0   *size_MatC];
+		f00Psource = &DC[DIR_00P   *size_MatC];
+		f00Msource = &DC[DIR_00M   *size_MatC];
+		fPP0source = &DC[DIR_PP0  *size_MatC];
+		fMM0source = &DC[DIR_MM0  *size_MatC];
+		fPM0source = &DC[DIR_PM0  *size_MatC];
+		fMP0source = &DC[DIR_MP0  *size_MatC];
+		fP0Psource = &DC[DIR_P0P  *size_MatC];
+		fM0Msource = &DC[DIR_M0M  *size_MatC];
+		fP0Msource = &DC[DIR_P0M  *size_MatC];
+		fM0Psource = &DC[DIR_M0P  *size_MatC];
+		f0PPsource = &DC[DIR_0PP  *size_MatC];
+		f0MMsource = &DC[DIR_0MM  *size_MatC];
+		f0PMsource = &DC[DIR_0PM  *size_MatC];
+		f0MPsource = &DC[DIR_0MP  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_MMM *size_MatC];
+		fMMPsource = &DC[DIR_MMP *size_MatC];
+		fMPPsource = &DC[DIR_MPP *size_MatC];
+		fMPMsource = &DC[DIR_MPM *size_MatC];
+		fPPMsource = &DC[DIR_PPM *size_MatC];
+		fPPPsource = &DC[DIR_PPP *size_MatC];
+		fPMPsource = &DC[DIR_PMP *size_MatC];
+		fPMMsource = &DC[DIR_PMM *size_MatC];
 	}
 	else
 	{
-		fP00source = &DC[W   *size_MatC];
-		fM00source = &DC[E   *size_MatC];
-		f0P0source = &DC[S   *size_MatC];
-		f0M0source = &DC[N   *size_MatC];
-		f00Psource = &DC[B   *size_MatC];
-		f00Msource = &DC[T   *size_MatC];
-		fPP0source = &DC[SW  *size_MatC];
-		fMM0source = &DC[NE  *size_MatC];
-		fPM0source = &DC[NW  *size_MatC];
-		fMP0source = &DC[SE  *size_MatC];
-		fP0Psource = &DC[BW  *size_MatC];
-		fM0Msource = &DC[TE  *size_MatC];
-		fP0Msource = &DC[TW  *size_MatC];
-		fM0Psource = &DC[BE  *size_MatC];
-		f0PPsource = &DC[BS  *size_MatC];
-		f0MMsource = &DC[TN  *size_MatC];
-		f0PMsource = &DC[TS  *size_MatC];
-		f0MPsource = &DC[BN  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[TNE *size_MatC];
-		fMMPsource = &DC[BNE *size_MatC];
-		fMPPsource = &DC[BSE *size_MatC];
-		fMPMsource = &DC[TSE *size_MatC];
-		fPPMsource = &DC[TSW *size_MatC];
-		fPPPsource = &DC[BSW *size_MatC];
-		fPMPsource = &DC[BNW *size_MatC];
-		fPMMsource = &DC[TNW *size_MatC];
+		fP00source = &DC[DIR_M00   *size_MatC];
+		fM00source = &DC[DIR_P00   *size_MatC];
+		f0P0source = &DC[DIR_0M0   *size_MatC];
+		f0M0source = &DC[DIR_0P0   *size_MatC];
+		f00Psource = &DC[DIR_00M   *size_MatC];
+		f00Msource = &DC[DIR_00P   *size_MatC];
+		fPP0source = &DC[DIR_MM0  *size_MatC];
+		fMM0source = &DC[DIR_PP0  *size_MatC];
+		fPM0source = &DC[DIR_MP0  *size_MatC];
+		fMP0source = &DC[DIR_PM0  *size_MatC];
+		fP0Psource = &DC[DIR_M0M  *size_MatC];
+		fM0Msource = &DC[DIR_P0P  *size_MatC];
+		fP0Msource = &DC[DIR_M0P  *size_MatC];
+		fM0Psource = &DC[DIR_P0M  *size_MatC];
+		f0PPsource = &DC[DIR_0MM  *size_MatC];
+		f0MMsource = &DC[DIR_0PP  *size_MatC];
+		f0PMsource = &DC[DIR_0MP  *size_MatC];
+		f0MPsource = &DC[DIR_0PM  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_PPP *size_MatC];
+		fMMPsource = &DC[DIR_PPM *size_MatC];
+		fMPPsource = &DC[DIR_PMM *size_MatC];
+		fMPMsource = &DC[DIR_PMP *size_MatC];
+		fPPMsource = &DC[DIR_MMP *size_MatC];
+		fPPPsource = &DC[DIR_MMM *size_MatC];
+		fPMPsource = &DC[DIR_MPM *size_MatC];
+		fPMMsource = &DC[DIR_MPP *size_MatC];
 	}
 
 	Distributions6 G;
-	G.g[E] = &G6[E   *size_MatF];
-	G.g[W] = &G6[W   *size_MatF];
-	G.g[N] = &G6[N   *size_MatF];
-	G.g[S] = &G6[S   *size_MatF];
-	G.g[T] = &G6[T   *size_MatF];
-	G.g[B] = &G6[B   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
+	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
+	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
+	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
+	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
+	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -1187,12 +1187,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -1623,12 +1623,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -2062,12 +2062,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -2501,12 +2501,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -2950,12 +2950,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -3389,12 +3389,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -3828,12 +3828,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -4267,12 +4267,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -4361,7 +4361,7 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3_2018(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
+__global__ void scaleCF_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -4391,33 +4391,33 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 		*f000dest, *fMMMdest, *fMMPdest, *fMPPdest, *fMPMdest, *fPPMdest, *fPPPdest, *fPMPdest, *fPMMdest;
 
 
-	fP00dest = &DF[E   *size_MatF];
-	fM00dest = &DF[W   *size_MatF];
-	f0P0dest = &DF[N   *size_MatF];
-	f0M0dest = &DF[S   *size_MatF];
-	f00Pdest = &DF[T   *size_MatF];
-	f00Mdest = &DF[B   *size_MatF];
-	fPP0dest = &DF[NE  *size_MatF];
-	fMM0dest = &DF[SW  *size_MatF];
-	fPM0dest = &DF[SE  *size_MatF];
-	fMP0dest = &DF[NW  *size_MatF];
-	fP0Pdest = &DF[TE  *size_MatF];
-	fM0Mdest = &DF[BW  *size_MatF];
-	fP0Mdest = &DF[BE  *size_MatF];
-	fM0Pdest = &DF[TW  *size_MatF];
-	f0PPdest = &DF[TN  *size_MatF];
-	f0MMdest = &DF[BS  *size_MatF];
-	f0PMdest = &DF[BN  *size_MatF];
-	f0MPdest = &DF[TS  *size_MatF];
-	f000dest = &DF[REST*size_MatF];
-	fMMMdest = &DF[BSW *size_MatF];
-	fMMPdest = &DF[TSW *size_MatF];
-	fMPPdest = &DF[TNW *size_MatF];
-	fMPMdest = &DF[BNW *size_MatF];
-	fPPMdest = &DF[BNE *size_MatF];
-	fPPPdest = &DF[TNE *size_MatF];
-	fPMPdest = &DF[TSE *size_MatF];
-	fPMMdest = &DF[BSE *size_MatF];
+	fP00dest = &DF[DIR_P00   *size_MatF];
+	fM00dest = &DF[DIR_M00   *size_MatF];
+	f0P0dest = &DF[DIR_0P0   *size_MatF];
+	f0M0dest = &DF[DIR_0M0   *size_MatF];
+	f00Pdest = &DF[DIR_00P   *size_MatF];
+	f00Mdest = &DF[DIR_00M   *size_MatF];
+	fPP0dest = &DF[DIR_PP0  *size_MatF];
+	fMM0dest = &DF[DIR_MM0  *size_MatF];
+	fPM0dest = &DF[DIR_PM0  *size_MatF];
+	fMP0dest = &DF[DIR_MP0  *size_MatF];
+	fP0Pdest = &DF[DIR_P0P  *size_MatF];
+	fM0Mdest = &DF[DIR_M0M  *size_MatF];
+	fP0Mdest = &DF[DIR_P0M  *size_MatF];
+	fM0Pdest = &DF[DIR_M0P  *size_MatF];
+	f0PPdest = &DF[DIR_0PP  *size_MatF];
+	f0MMdest = &DF[DIR_0MM  *size_MatF];
+	f0PMdest = &DF[DIR_0PM  *size_MatF];
+	f0MPdest = &DF[DIR_0MP  *size_MatF];
+	f000dest = &DF[DIR_000*size_MatF];
+	fMMMdest = &DF[DIR_MMM *size_MatF];
+	fMMPdest = &DF[DIR_MMP *size_MatF];
+	fMPPdest = &DF[DIR_MPP *size_MatF];
+	fMPMdest = &DF[DIR_MPM *size_MatF];
+	fPPMdest = &DF[DIR_PPM *size_MatF];
+	fPPPdest = &DF[DIR_PPP *size_MatF];
+	fPMPdest = &DF[DIR_PMP *size_MatF];
+	fPMMdest = &DF[DIR_PMM *size_MatF];
 
 	real
 		*fP00source, *fM00source, *f0P0source, *f0M0source, *f00Psource, *f00Msource, *fPP0source, *fMM0source, *fPM0source,
@@ -4426,72 +4426,72 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 
 	if (isEvenTimestep == true)
 	{
-		fP00source = &DC[E   *size_MatC];
-		fM00source = &DC[W   *size_MatC];
-		f0P0source = &DC[N   *size_MatC];
-		f0M0source = &DC[S   *size_MatC];
-		f00Psource = &DC[T   *size_MatC];
-		f00Msource = &DC[B   *size_MatC];
-		fPP0source = &DC[NE  *size_MatC];
-		fMM0source = &DC[SW  *size_MatC];
-		fPM0source = &DC[SE  *size_MatC];
-		fMP0source = &DC[NW  *size_MatC];
-		fP0Psource = &DC[TE  *size_MatC];
-		fM0Msource = &DC[BW  *size_MatC];
-		fP0Msource = &DC[BE  *size_MatC];
-		fM0Psource = &DC[TW  *size_MatC];
-		f0PPsource = &DC[TN  *size_MatC];
-		f0MMsource = &DC[BS  *size_MatC];
-		f0PMsource = &DC[BN  *size_MatC];
-		f0MPsource = &DC[TS  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[BSW *size_MatC];
-		fMMPsource = &DC[TSW *size_MatC];
-		fMPPsource = &DC[TNW *size_MatC];
-		fMPMsource = &DC[BNW *size_MatC];
-		fPPMsource = &DC[BNE *size_MatC];
-		fPPPsource = &DC[TNE *size_MatC];
-		fPMPsource = &DC[TSE *size_MatC];
-		fPMMsource = &DC[BSE *size_MatC];
+		fP00source = &DC[DIR_P00   *size_MatC];
+		fM00source = &DC[DIR_M00   *size_MatC];
+		f0P0source = &DC[DIR_0P0   *size_MatC];
+		f0M0source = &DC[DIR_0M0   *size_MatC];
+		f00Psource = &DC[DIR_00P   *size_MatC];
+		f00Msource = &DC[DIR_00M   *size_MatC];
+		fPP0source = &DC[DIR_PP0  *size_MatC];
+		fMM0source = &DC[DIR_MM0  *size_MatC];
+		fPM0source = &DC[DIR_PM0  *size_MatC];
+		fMP0source = &DC[DIR_MP0  *size_MatC];
+		fP0Psource = &DC[DIR_P0P  *size_MatC];
+		fM0Msource = &DC[DIR_M0M  *size_MatC];
+		fP0Msource = &DC[DIR_P0M  *size_MatC];
+		fM0Psource = &DC[DIR_M0P  *size_MatC];
+		f0PPsource = &DC[DIR_0PP  *size_MatC];
+		f0MMsource = &DC[DIR_0MM  *size_MatC];
+		f0PMsource = &DC[DIR_0PM  *size_MatC];
+		f0MPsource = &DC[DIR_0MP  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_MMM *size_MatC];
+		fMMPsource = &DC[DIR_MMP *size_MatC];
+		fMPPsource = &DC[DIR_MPP *size_MatC];
+		fMPMsource = &DC[DIR_MPM *size_MatC];
+		fPPMsource = &DC[DIR_PPM *size_MatC];
+		fPPPsource = &DC[DIR_PPP *size_MatC];
+		fPMPsource = &DC[DIR_PMP *size_MatC];
+		fPMMsource = &DC[DIR_PMM *size_MatC];
 	}
 	else
 	{
-		fP00source = &DC[W   *size_MatC];
-		fM00source = &DC[E   *size_MatC];
-		f0P0source = &DC[S   *size_MatC];
-		f0M0source = &DC[N   *size_MatC];
-		f00Psource = &DC[B   *size_MatC];
-		f00Msource = &DC[T   *size_MatC];
-		fPP0source = &DC[SW  *size_MatC];
-		fMM0source = &DC[NE  *size_MatC];
-		fPM0source = &DC[NW  *size_MatC];
-		fMP0source = &DC[SE  *size_MatC];
-		fP0Psource = &DC[BW  *size_MatC];
-		fM0Msource = &DC[TE  *size_MatC];
-		fP0Msource = &DC[TW  *size_MatC];
-		fM0Psource = &DC[BE  *size_MatC];
-		f0PPsource = &DC[BS  *size_MatC];
-		f0MMsource = &DC[TN  *size_MatC];
-		f0PMsource = &DC[TS  *size_MatC];
-		f0MPsource = &DC[BN  *size_MatC];
-		f000source = &DC[REST*size_MatC];
-		fMMMsource = &DC[TNE *size_MatC];
-		fMMPsource = &DC[BNE *size_MatC];
-		fMPPsource = &DC[BSE *size_MatC];
-		fMPMsource = &DC[TSE *size_MatC];
-		fPPMsource = &DC[TSW *size_MatC];
-		fPPPsource = &DC[BSW *size_MatC];
-		fPMPsource = &DC[BNW *size_MatC];
-		fPMMsource = &DC[TNW *size_MatC];
+		fP00source = &DC[DIR_M00   *size_MatC];
+		fM00source = &DC[DIR_P00   *size_MatC];
+		f0P0source = &DC[DIR_0M0   *size_MatC];
+		f0M0source = &DC[DIR_0P0   *size_MatC];
+		f00Psource = &DC[DIR_00M   *size_MatC];
+		f00Msource = &DC[DIR_00P   *size_MatC];
+		fPP0source = &DC[DIR_MM0  *size_MatC];
+		fMM0source = &DC[DIR_PP0  *size_MatC];
+		fPM0source = &DC[DIR_MP0  *size_MatC];
+		fMP0source = &DC[DIR_PM0  *size_MatC];
+		fP0Psource = &DC[DIR_M0M  *size_MatC];
+		fM0Msource = &DC[DIR_P0P  *size_MatC];
+		fP0Msource = &DC[DIR_M0P  *size_MatC];
+		fM0Psource = &DC[DIR_P0M  *size_MatC];
+		f0PPsource = &DC[DIR_0MM  *size_MatC];
+		f0MMsource = &DC[DIR_0PP  *size_MatC];
+		f0PMsource = &DC[DIR_0MP  *size_MatC];
+		f0MPsource = &DC[DIR_0PM  *size_MatC];
+		f000source = &DC[DIR_000*size_MatC];
+		fMMMsource = &DC[DIR_PPP *size_MatC];
+		fMMPsource = &DC[DIR_PPM *size_MatC];
+		fMPPsource = &DC[DIR_PMM *size_MatC];
+		fMPMsource = &DC[DIR_PMP *size_MatC];
+		fPPMsource = &DC[DIR_MMP *size_MatC];
+		fPPPsource = &DC[DIR_MMM *size_MatC];
+		fPMPsource = &DC[DIR_MPM *size_MatC];
+		fPMMsource = &DC[DIR_MPP *size_MatC];
 	}
 
 	Distributions6 G;
-	G.g[E] = &G6[E   *size_MatF];
-	G.g[W] = &G6[W   *size_MatF];
-	G.g[N] = &G6[N   *size_MatF];
-	G.g[S] = &G6[S   *size_MatF];
-	G.g[T] = &G6[T   *size_MatF];
-	G.g[B] = &G6[B   *size_MatF];
+	G.g[DIR_P00] = &G6[DIR_P00   *size_MatF];
+	G.g[DIR_M00] = &G6[DIR_M00   *size_MatF];
+	G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatF];
+	G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatF];
+	G.g[DIR_00P] = &G6[DIR_00P   *size_MatF];
+	G.g[DIR_00M] = &G6[DIR_00M   *size_MatF];
 
 	////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -5522,12 +5522,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -5949,12 +5949,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -6379,12 +6379,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -6809,12 +6809,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -7249,12 +7249,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -7679,12 +7679,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -8109,12 +8109,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
@@ -8539,12 +8539,12 @@ extern "C" __global__ void scaleCF_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;
 	  fM00dest[kM00] = mfabb;
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
index 773a5829d53be7966008758e1271c4754348287c..3ae909e4c4350b4d6cc4003b49a235bcd7d6828c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_0817_comp_27( real* DC, 
+__global__ void scaleFC_0817_comp_27( real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -43,33 +43,33 @@ extern "C" __global__ void scaleFC_0817_comp_27( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[E   *size_MatF];
-   fM00source = &DF[W   *size_MatF];
-   f0P0source = &DF[N   *size_MatF];
-   f0M0source = &DF[S   *size_MatF];
-   f00Psource = &DF[T   *size_MatF];
-   f00Msource = &DF[B   *size_MatF];
-   fPP0source = &DF[NE  *size_MatF];
-   fMM0source = &DF[SW  *size_MatF];
-   fPM0source = &DF[SE  *size_MatF];
-   fMP0source = &DF[NW  *size_MatF];
-   fP0Psource = &DF[TE  *size_MatF];
-   fM0Msource = &DF[BW  *size_MatF];
-   fP0Msource = &DF[BE  *size_MatF];
-   fM0Psource = &DF[TW  *size_MatF];
-   f0PPsource = &DF[TN  *size_MatF];
-   f0MMsource = &DF[BS  *size_MatF];
-   f0PMsource = &DF[BN  *size_MatF];
-   f0MPsource = &DF[TS  *size_MatF];
-   f000source = &DF[REST*size_MatF];
-   fMMMsource = &DF[BSW *size_MatF];
-   fMMPsource = &DF[TSW *size_MatF];
-   fMPPsource = &DF[TNW *size_MatF];
-   fMPMsource = &DF[BNW *size_MatF];
-   fPPMsource = &DF[BNE *size_MatF];
-   fPPPsource = &DF[TNE *size_MatF];
-   fPMPsource = &DF[TSE *size_MatF];
-   fPMMsource = &DF[BSE *size_MatF];
+   fP00source = &DF[DIR_P00   *size_MatF];
+   fM00source = &DF[DIR_M00   *size_MatF];
+   f0P0source = &DF[DIR_0P0   *size_MatF];
+   f0M0source = &DF[DIR_0M0   *size_MatF];
+   f00Psource = &DF[DIR_00P   *size_MatF];
+   f00Msource = &DF[DIR_00M   *size_MatF];
+   fPP0source = &DF[DIR_PP0  *size_MatF];
+   fMM0source = &DF[DIR_MM0  *size_MatF];
+   fPM0source = &DF[DIR_PM0  *size_MatF];
+   fMP0source = &DF[DIR_MP0  *size_MatF];
+   fP0Psource = &DF[DIR_P0P  *size_MatF];
+   fM0Msource = &DF[DIR_M0M  *size_MatF];
+   fP0Msource = &DF[DIR_P0M  *size_MatF];
+   fM0Psource = &DF[DIR_M0P  *size_MatF];
+   f0PPsource = &DF[DIR_0PP  *size_MatF];
+   f0MMsource = &DF[DIR_0MM  *size_MatF];
+   f0PMsource = &DF[DIR_0PM  *size_MatF];
+   f0MPsource = &DF[DIR_0MP  *size_MatF];
+   f000source = &DF[DIR_000*size_MatF];
+   fMMMsource = &DF[DIR_MMM *size_MatF];
+   fMMPsource = &DF[DIR_MMP *size_MatF];
+   fMPPsource = &DF[DIR_MPP *size_MatF];
+   fMPMsource = &DF[DIR_MPM *size_MatF];
+   fPPMsource = &DF[DIR_PPM *size_MatF];
+   fPPPsource = &DF[DIR_PPP *size_MatF];
+   fPMPsource = &DF[DIR_PMP *size_MatF];
+   fPMMsource = &DF[DIR_PMM *size_MatF];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -78,63 +78,63 @@ extern "C" __global__ void scaleFC_0817_comp_27( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[E   *size_MatC];
-	   fM00dest = &DC[W   *size_MatC];
-	   f0P0dest = &DC[N   *size_MatC];
-	   f0M0dest = &DC[S   *size_MatC];
-	   f00Pdest = &DC[T   *size_MatC];
-	   f00Mdest = &DC[B   *size_MatC];
-	   fPP0dest = &DC[NE  *size_MatC];
-	   fMM0dest = &DC[SW  *size_MatC];
-	   fPM0dest = &DC[SE  *size_MatC];
-	   fMP0dest = &DC[NW  *size_MatC];
-	   fP0Pdest = &DC[TE  *size_MatC];
-	   fM0Mdest = &DC[BW  *size_MatC];
-	   fP0Mdest = &DC[BE  *size_MatC];
-	   fM0Pdest = &DC[TW  *size_MatC];
-	   f0PPdest = &DC[TN  *size_MatC];
-	   f0MMdest = &DC[BS  *size_MatC];
-	   f0PMdest = &DC[BN  *size_MatC];
-	   f0MPdest = &DC[TS  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[BSW *size_MatC];
-	   fMMPdest = &DC[TSW *size_MatC];
-	   fMPPdest = &DC[TNW *size_MatC];
-	   fMPMdest = &DC[BNW *size_MatC];
-	   fPPMdest = &DC[BNE *size_MatC];
-	   fPPPdest = &DC[TNE *size_MatC];
-	   fPMPdest = &DC[TSE *size_MatC];
-	   fPMMdest = &DC[BSE *size_MatC];
+	   fP00dest = &DC[DIR_P00   *size_MatC];
+	   fM00dest = &DC[DIR_M00   *size_MatC];
+	   f0P0dest = &DC[DIR_0P0   *size_MatC];
+	   f0M0dest = &DC[DIR_0M0   *size_MatC];
+	   f00Pdest = &DC[DIR_00P   *size_MatC];
+	   f00Mdest = &DC[DIR_00M   *size_MatC];
+	   fPP0dest = &DC[DIR_PP0  *size_MatC];
+	   fMM0dest = &DC[DIR_MM0  *size_MatC];
+	   fPM0dest = &DC[DIR_PM0  *size_MatC];
+	   fMP0dest = &DC[DIR_MP0  *size_MatC];
+	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
+	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
+	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
+	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
+	   f0PPdest = &DC[DIR_0PP  *size_MatC];
+	   f0MMdest = &DC[DIR_0MM  *size_MatC];
+	   f0PMdest = &DC[DIR_0PM  *size_MatC];
+	   f0MPdest = &DC[DIR_0MP  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_MMM *size_MatC];
+	   fMMPdest = &DC[DIR_MMP *size_MatC];
+	   fMPPdest = &DC[DIR_MPP *size_MatC];
+	   fMPMdest = &DC[DIR_MPM *size_MatC];
+	   fPPMdest = &DC[DIR_PPM *size_MatC];
+	   fPPPdest = &DC[DIR_PPP *size_MatC];
+	   fPMPdest = &DC[DIR_PMP *size_MatC];
+	   fPMMdest = &DC[DIR_PMM *size_MatC];
    } 
    else
    {
-	   fP00dest = &DC[W   *size_MatC];
-	   fM00dest = &DC[E   *size_MatC];
-	   f0P0dest = &DC[S   *size_MatC];
-	   f0M0dest = &DC[N   *size_MatC];
-	   f00Pdest = &DC[B   *size_MatC];
-	   f00Mdest = &DC[T   *size_MatC];
-	   fPP0dest = &DC[SW  *size_MatC];
-	   fMM0dest = &DC[NE  *size_MatC];
-	   fPM0dest = &DC[NW  *size_MatC];
-	   fMP0dest = &DC[SE  *size_MatC];
-	   fP0Pdest = &DC[BW  *size_MatC];
-	   fM0Mdest = &DC[TE  *size_MatC];
-	   fP0Mdest = &DC[TW  *size_MatC];
-	   fM0Pdest = &DC[BE  *size_MatC];
-	   f0PPdest = &DC[BS  *size_MatC];
-	   f0MMdest = &DC[TN  *size_MatC];
-	   f0PMdest = &DC[TS  *size_MatC];
-	   f0MPdest = &DC[BN  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[TNE *size_MatC];
-	   fMMPdest = &DC[BNE *size_MatC];
-	   fMPPdest = &DC[BSE *size_MatC];
-	   fMPMdest = &DC[TSE *size_MatC];
-	   fPPMdest = &DC[TSW *size_MatC];
-	   fPPPdest = &DC[BSW *size_MatC];
-	   fPMPdest = &DC[BNW *size_MatC];
-	   fPMMdest = &DC[TNW *size_MatC];
+	   fP00dest = &DC[DIR_M00   *size_MatC];
+	   fM00dest = &DC[DIR_P00   *size_MatC];
+	   f0P0dest = &DC[DIR_0M0   *size_MatC];
+	   f0M0dest = &DC[DIR_0P0   *size_MatC];
+	   f00Pdest = &DC[DIR_00M   *size_MatC];
+	   f00Mdest = &DC[DIR_00P   *size_MatC];
+	   fPP0dest = &DC[DIR_MM0  *size_MatC];
+	   fMM0dest = &DC[DIR_PP0  *size_MatC];
+	   fPM0dest = &DC[DIR_MP0  *size_MatC];
+	   fMP0dest = &DC[DIR_PM0  *size_MatC];
+	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
+	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
+	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
+	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
+	   f0PPdest = &DC[DIR_0MM  *size_MatC];
+	   f0MMdest = &DC[DIR_0PP  *size_MatC];
+	   f0PMdest = &DC[DIR_0MP  *size_MatC];
+	   f0MPdest = &DC[DIR_0PM  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_PPP *size_MatC];
+	   fMMPdest = &DC[DIR_PPM *size_MatC];
+	   fMPPdest = &DC[DIR_PMM *size_MatC];
+	   fMPMdest = &DC[DIR_PMP *size_MatC];
+	   fPPMdest = &DC[DIR_MMP *size_MatC];
+	   fPPPdest = &DC[DIR_MMM *size_MatC];
+	   fPMPdest = &DC[DIR_MPM *size_MatC];
+	   fPMMdest = &DC[DIR_MPP *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -1210,7 +1210,7 @@ extern "C" __global__ void scaleFC_0817_comp_27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC, 
+__global__ void scaleFC_AA2016_comp_27(real* DC, 
 												  real* DF, 
 												  unsigned int* neighborCX,
 												  unsigned int* neighborCY,
@@ -1236,96 +1236,96 @@ extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -5399,7 +5399,7 @@ extern "C" __global__ void scaleFC_AA2016_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, 
+__global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC, 
 														real* DF, 
 														unsigned int* neighborCX,
 														unsigned int* neighborCY,
@@ -5425,96 +5425,96 @@ extern "C" __global__ void scaleFC_RhoSq_3rdMom_comp_27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -9597,93 +9597,93 @@ __device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned i
     real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF,
         *fbnF, *ftsF, *fzeroF, *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-    feF    = &DF[E * size_MatF];
-    fwF    = &DF[W * size_MatF];
-    fnF    = &DF[N * size_MatF];
-    fsF    = &DF[S * size_MatF];
-    ftF    = &DF[T * size_MatF];
-    fbF    = &DF[B * size_MatF];
-    fneF   = &DF[NE * size_MatF];
-    fswF   = &DF[SW * size_MatF];
-    fseF   = &DF[SE * size_MatF];
-    fnwF   = &DF[NW * size_MatF];
-    fteF   = &DF[TE * size_MatF];
-    fbwF   = &DF[BW * size_MatF];
-    fbeF   = &DF[BE * size_MatF];
-    ftwF   = &DF[TW * size_MatF];
-    ftnF   = &DF[TN * size_MatF];
-    fbsF   = &DF[BS * size_MatF];
-    fbnF   = &DF[BN * size_MatF];
-    ftsF   = &DF[TS * size_MatF];
-    fzeroF = &DF[REST * size_MatF];
-    ftneF  = &DF[TNE * size_MatF];
-    ftswF  = &DF[TSW * size_MatF];
-    ftseF  = &DF[TSE * size_MatF];
-    ftnwF  = &DF[TNW * size_MatF];
-    fbneF  = &DF[BNE * size_MatF];
-    fbswF  = &DF[BSW * size_MatF];
-    fbseF  = &DF[BSE * size_MatF];
-    fbnwF  = &DF[BNW * size_MatF];
+    feF    = &DF[DIR_P00 * size_MatF];
+    fwF    = &DF[DIR_M00 * size_MatF];
+    fnF    = &DF[DIR_0P0 * size_MatF];
+    fsF    = &DF[DIR_0M0 * size_MatF];
+    ftF    = &DF[DIR_00P * size_MatF];
+    fbF    = &DF[DIR_00M * size_MatF];
+    fneF   = &DF[DIR_PP0 * size_MatF];
+    fswF   = &DF[DIR_MM0 * size_MatF];
+    fseF   = &DF[DIR_PM0 * size_MatF];
+    fnwF   = &DF[DIR_MP0 * size_MatF];
+    fteF   = &DF[DIR_P0P * size_MatF];
+    fbwF   = &DF[DIR_M0M * size_MatF];
+    fbeF   = &DF[DIR_P0M * size_MatF];
+    ftwF   = &DF[DIR_M0P * size_MatF];
+    ftnF   = &DF[DIR_0PP * size_MatF];
+    fbsF   = &DF[DIR_0MM * size_MatF];
+    fbnF   = &DF[DIR_0PM * size_MatF];
+    ftsF   = &DF[DIR_0MP * size_MatF];
+    fzeroF = &DF[DIR_000 * size_MatF];
+    ftneF  = &DF[DIR_PPP * size_MatF];
+    ftswF  = &DF[DIR_MMP * size_MatF];
+    ftseF  = &DF[DIR_PMP * size_MatF];
+    ftnwF  = &DF[DIR_MPP * size_MatF];
+    fbneF  = &DF[DIR_PPM * size_MatF];
+    fbswF  = &DF[DIR_MMM * size_MatF];
+    fbseF  = &DF[DIR_PMM * size_MatF];
+    fbnwF  = &DF[DIR_MPM * size_MatF];
 
     real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC,
         *fbnC, *ftsC, *fzeroC, *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
     if (isEvenTimestep == true) {
-        feC    = &DC[E * size_MatC];
-        fwC    = &DC[W * size_MatC];
-        fnC    = &DC[N * size_MatC];
-        fsC    = &DC[S * size_MatC];
-        ftC    = &DC[T * size_MatC];
-        fbC    = &DC[B * size_MatC];
-        fneC   = &DC[NE * size_MatC];
-        fswC   = &DC[SW * size_MatC];
-        fseC   = &DC[SE * size_MatC];
-        fnwC   = &DC[NW * size_MatC];
-        fteC   = &DC[TE * size_MatC];
-        fbwC   = &DC[BW * size_MatC];
-        fbeC   = &DC[BE * size_MatC];
-        ftwC   = &DC[TW * size_MatC];
-        ftnC   = &DC[TN * size_MatC];
-        fbsC   = &DC[BS * size_MatC];
-        fbnC   = &DC[BN * size_MatC];
-        ftsC   = &DC[TS * size_MatC];
-        fzeroC = &DC[REST * size_MatC];
-        ftneC  = &DC[TNE * size_MatC];
-        ftswC  = &DC[TSW * size_MatC];
-        ftseC  = &DC[TSE * size_MatC];
-        ftnwC  = &DC[TNW * size_MatC];
-        fbneC  = &DC[BNE * size_MatC];
-        fbswC  = &DC[BSW * size_MatC];
-        fbseC  = &DC[BSE * size_MatC];
-        fbnwC  = &DC[BNW * size_MatC];
+        feC    = &DC[DIR_P00 * size_MatC];
+        fwC    = &DC[DIR_M00 * size_MatC];
+        fnC    = &DC[DIR_0P0 * size_MatC];
+        fsC    = &DC[DIR_0M0 * size_MatC];
+        ftC    = &DC[DIR_00P * size_MatC];
+        fbC    = &DC[DIR_00M * size_MatC];
+        fneC   = &DC[DIR_PP0 * size_MatC];
+        fswC   = &DC[DIR_MM0 * size_MatC];
+        fseC   = &DC[DIR_PM0 * size_MatC];
+        fnwC   = &DC[DIR_MP0 * size_MatC];
+        fteC   = &DC[DIR_P0P * size_MatC];
+        fbwC   = &DC[DIR_M0M * size_MatC];
+        fbeC   = &DC[DIR_P0M * size_MatC];
+        ftwC   = &DC[DIR_M0P * size_MatC];
+        ftnC   = &DC[DIR_0PP * size_MatC];
+        fbsC   = &DC[DIR_0MM * size_MatC];
+        fbnC   = &DC[DIR_0PM * size_MatC];
+        ftsC   = &DC[DIR_0MP * size_MatC];
+        fzeroC = &DC[DIR_000 * size_MatC];
+        ftneC  = &DC[DIR_PPP * size_MatC];
+        ftswC  = &DC[DIR_MMP * size_MatC];
+        ftseC  = &DC[DIR_PMP * size_MatC];
+        ftnwC  = &DC[DIR_MPP * size_MatC];
+        fbneC  = &DC[DIR_PPM * size_MatC];
+        fbswC  = &DC[DIR_MMM * size_MatC];
+        fbseC  = &DC[DIR_PMM * size_MatC];
+        fbnwC  = &DC[DIR_MPM * size_MatC];
     } else {
-        fwC    = &DC[E * size_MatC];
-        feC    = &DC[W * size_MatC];
-        fsC    = &DC[N * size_MatC];
-        fnC    = &DC[S * size_MatC];
-        fbC    = &DC[T * size_MatC];
-        ftC    = &DC[B * size_MatC];
-        fswC   = &DC[NE * size_MatC];
-        fneC   = &DC[SW * size_MatC];
-        fnwC   = &DC[SE * size_MatC];
-        fseC   = &DC[NW * size_MatC];
-        fbwC   = &DC[TE * size_MatC];
-        fteC   = &DC[BW * size_MatC];
-        ftwC   = &DC[BE * size_MatC];
-        fbeC   = &DC[TW * size_MatC];
-        fbsC   = &DC[TN * size_MatC];
-        ftnC   = &DC[BS * size_MatC];
-        ftsC   = &DC[BN * size_MatC];
-        fbnC   = &DC[TS * size_MatC];
-        fzeroC = &DC[REST * size_MatC];
-        fbswC  = &DC[TNE * size_MatC];
-        fbneC  = &DC[TSW * size_MatC];
-        fbnwC  = &DC[TSE * size_MatC];
-        fbseC  = &DC[TNW * size_MatC];
-        ftswC  = &DC[BNE * size_MatC];
-        ftneC  = &DC[BSW * size_MatC];
-        ftnwC  = &DC[BSE * size_MatC];
-        ftseC  = &DC[BNW * size_MatC];
+        fwC    = &DC[DIR_P00 * size_MatC];
+        feC    = &DC[DIR_M00 * size_MatC];
+        fsC    = &DC[DIR_0P0 * size_MatC];
+        fnC    = &DC[DIR_0M0 * size_MatC];
+        fbC    = &DC[DIR_00P * size_MatC];
+        ftC    = &DC[DIR_00M * size_MatC];
+        fswC   = &DC[DIR_PP0 * size_MatC];
+        fneC   = &DC[DIR_MM0 * size_MatC];
+        fnwC   = &DC[DIR_PM0 * size_MatC];
+        fseC   = &DC[DIR_MP0 * size_MatC];
+        fbwC   = &DC[DIR_P0P * size_MatC];
+        fteC   = &DC[DIR_M0M * size_MatC];
+        ftwC   = &DC[DIR_P0M * size_MatC];
+        fbeC   = &DC[DIR_M0P * size_MatC];
+        fbsC   = &DC[DIR_0PP * size_MatC];
+        ftnC   = &DC[DIR_0MM * size_MatC];
+        ftsC   = &DC[DIR_0PM * size_MatC];
+        fbnC   = &DC[DIR_0MP * size_MatC];
+        fzeroC = &DC[DIR_000 * size_MatC];
+        fbswC  = &DC[DIR_PPP * size_MatC];
+        fbneC  = &DC[DIR_MMP * size_MatC];
+        fbnwC  = &DC[DIR_PMP * size_MatC];
+        fbseC  = &DC[DIR_MPP * size_MatC];
+        ftswC  = &DC[DIR_PPM * size_MatC];
+        ftneC  = &DC[DIR_MMM * size_MatC];
+        ftnwC  = &DC[DIR_PMM * size_MatC];
+        ftseC  = &DC[DIR_MPM * size_MatC];
     }
 
     ////////////////////////////////////////////////////////////////////////////////
@@ -11056,7 +11056,7 @@ __device__ void scaleFC_RhoSq_comp_27_Calculation(real *DC, real *DF, unsigned i
     }
 }
 
-extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC, 
+__global__ void scaleFC_RhoSq_comp_27(real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -11149,7 +11149,7 @@ extern "C" __global__ void scaleFC_RhoSq_comp_27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC, 
+__global__ void scaleFC_staggered_time_comp_27(   real* DC, 
 															 real* DF, 
 															 unsigned int* neighborCX,
 															 unsigned int* neighborCY,
@@ -11175,96 +11175,96 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -11759,63 +11759,63 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 	  ////pointertausch
 	  // if (isEvenTimestep==false)
 	  // {
-		 // feC    = &DC[E   *size_MatC];
-		 // fwC    = &DC[W   *size_MatC];
-		 // fnC    = &DC[N   *size_MatC];
-		 // fsC    = &DC[S   *size_MatC];
-		 // ftC    = &DC[T   *size_MatC];
-		 // fbC    = &DC[B   *size_MatC];
-		 // fneC   = &DC[NE  *size_MatC];
-		 // fswC   = &DC[SW  *size_MatC];
-		 // fseC   = &DC[SE  *size_MatC];
-		 // fnwC   = &DC[NW  *size_MatC];
-		 // fteC   = &DC[TE  *size_MatC];
-		 // fbwC   = &DC[BW  *size_MatC];
-		 // fbeC   = &DC[BE  *size_MatC];
-		 // ftwC   = &DC[TW  *size_MatC];
-		 // ftnC   = &DC[TN  *size_MatC];
-		 // fbsC   = &DC[BS  *size_MatC];
-		 // fbnC   = &DC[BN  *size_MatC];
-		 // ftsC   = &DC[TS  *size_MatC];
-		 // fzeroC = &DC[REST*size_MatC];
-		 // ftneC  = &DC[TNE *size_MatC];
-		 // ftswC  = &DC[TSW *size_MatC];
-		 // ftseC  = &DC[TSE *size_MatC];
-		 // ftnwC  = &DC[TNW *size_MatC];
-		 // fbneC  = &DC[BNE *size_MatC];
-		 // fbswC  = &DC[BSW *size_MatC];
-		 // fbseC  = &DC[BSE *size_MatC];
-		 // fbnwC  = &DC[BNW *size_MatC];
+		 // feC    = &DC[DIR_P00   *size_MatC];
+		 // fwC    = &DC[DIR_M00   *size_MatC];
+		 // fnC    = &DC[DIR_0P0   *size_MatC];
+		 // fsC    = &DC[DIR_0M0   *size_MatC];
+		 // ftC    = &DC[DIR_00P   *size_MatC];
+		 // fbC    = &DC[DIR_00M   *size_MatC];
+		 // fneC   = &DC[DIR_PP0  *size_MatC];
+		 // fswC   = &DC[DIR_MM0  *size_MatC];
+		 // fseC   = &DC[DIR_PM0  *size_MatC];
+		 // fnwC   = &DC[DIR_MP0  *size_MatC];
+		 // fteC   = &DC[DIR_P0P  *size_MatC];
+		 // fbwC   = &DC[DIR_M0M  *size_MatC];
+		 // fbeC   = &DC[DIR_P0M  *size_MatC];
+		 // ftwC   = &DC[DIR_M0P  *size_MatC];
+		 // ftnC   = &DC[DIR_0PP  *size_MatC];
+		 // fbsC   = &DC[DIR_0MM  *size_MatC];
+		 // fbnC   = &DC[DIR_0PM  *size_MatC];
+		 // ftsC   = &DC[DIR_0MP  *size_MatC];
+		 // fzeroC = &DC[DIR_000*size_MatC];
+		 // ftneC  = &DC[DIR_PPP *size_MatC];
+		 // ftswC  = &DC[DIR_MMP *size_MatC];
+		 // ftseC  = &DC[DIR_PMP *size_MatC];
+		 // ftnwC  = &DC[DIR_MPP *size_MatC];
+		 // fbneC  = &DC[DIR_PPM *size_MatC];
+		 // fbswC  = &DC[DIR_MMM *size_MatC];
+		 // fbseC  = &DC[DIR_PMM *size_MatC];
+		 // fbnwC  = &DC[DIR_MPM *size_MatC];
 	  // } 
 	  // else
 	  // {
-		 // fwC    = &DC[E   *size_MatC];
-		 // feC    = &DC[W   *size_MatC];
-		 // fsC    = &DC[N   *size_MatC];
-		 // fnC    = &DC[S   *size_MatC];
-		 // fbC    = &DC[T   *size_MatC];
-		 // ftC    = &DC[B   *size_MatC];
-		 // fswC   = &DC[NE  *size_MatC];
-		 // fneC   = &DC[SW  *size_MatC];
-		 // fnwC   = &DC[SE  *size_MatC];
-		 // fseC   = &DC[NW  *size_MatC];
-		 // fbwC   = &DC[TE  *size_MatC];
-		 // fteC   = &DC[BW  *size_MatC];
-		 // ftwC   = &DC[BE  *size_MatC];
-		 // fbeC   = &DC[TW  *size_MatC];
-		 // fbsC   = &DC[TN  *size_MatC];
-		 // ftnC   = &DC[BS  *size_MatC];
-		 // ftsC   = &DC[BN  *size_MatC];
-		 // fbnC   = &DC[TS  *size_MatC];
-		 // fzeroC = &DC[REST*size_MatC];
-		 // fbswC  = &DC[TNE *size_MatC];
-		 // fbneC  = &DC[TSW *size_MatC];
-		 // fbnwC  = &DC[TSE *size_MatC];
-		 // fbseC  = &DC[TNW *size_MatC];
-		 // ftswC  = &DC[BNE *size_MatC];
-		 // ftneC  = &DC[BSW *size_MatC];
-		 // ftnwC  = &DC[BSE *size_MatC];
-		 // ftseC  = &DC[BNW *size_MatC];
+		 // fwC    = &DC[DIR_P00   *size_MatC];
+		 // feC    = &DC[DIR_M00   *size_MatC];
+		 // fsC    = &DC[DIR_0P0   *size_MatC];
+		 // fnC    = &DC[DIR_0M0   *size_MatC];
+		 // fbC    = &DC[DIR_00P   *size_MatC];
+		 // ftC    = &DC[DIR_00M   *size_MatC];
+		 // fswC   = &DC[DIR_PP0  *size_MatC];
+		 // fneC   = &DC[DIR_MM0  *size_MatC];
+		 // fnwC   = &DC[DIR_PM0  *size_MatC];
+		 // fseC   = &DC[DIR_MP0  *size_MatC];
+		 // fbwC   = &DC[DIR_P0P  *size_MatC];
+		 // fteC   = &DC[DIR_M0M  *size_MatC];
+		 // ftwC   = &DC[DIR_P0M  *size_MatC];
+		 // fbeC   = &DC[DIR_M0P  *size_MatC];
+		 // fbsC   = &DC[DIR_0PP  *size_MatC];
+		 // ftnC   = &DC[DIR_0MM  *size_MatC];
+		 // ftsC   = &DC[DIR_0PM  *size_MatC];
+		 // fbnC   = &DC[DIR_0MP  *size_MatC];
+		 // fzeroC = &DC[DIR_000*size_MatC];
+		 // fbswC  = &DC[DIR_PPP *size_MatC];
+		 // fbneC  = &DC[DIR_MMP *size_MatC];
+		 // fbnwC  = &DC[DIR_PMP *size_MatC];
+		 // fbseC  = &DC[DIR_MPP *size_MatC];
+		 // ftswC  = &DC[DIR_PPM *size_MatC];
+		 // ftneC  = &DC[DIR_MMM *size_MatC];
+		 // ftnwC  = &DC[DIR_PMM *size_MatC];
+		 // ftseC  = &DC[DIR_MPM *size_MatC];
 	  // }
 
  	 // real rho_tmp;
@@ -13270,7 +13270,7 @@ extern "C" __global__ void scaleFC_staggered_time_comp_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_Fix_comp_27(  real* DC, 
+__global__ void scaleFC_Fix_comp_27(  real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -13296,96 +13296,96 @@ extern "C" __global__ void scaleFC_Fix_comp_27(  real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -15130,7 +15130,7 @@ extern "C" __global__ void scaleFC_Fix_comp_27(  real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_NSPress_27(   real* DC, 
+__global__ void scaleFC_NSPress_27(   real* DC, 
 												 real* DF, 
 												 unsigned int* neighborCX,
 												 unsigned int* neighborCY,
@@ -15156,96 +15156,96 @@ extern "C" __global__ void scaleFC_NSPress_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -16336,7 +16336,7 @@ extern "C" __global__ void scaleFC_NSPress_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_Fix_27(   real* DC, 
+__global__ void scaleFC_Fix_27(   real* DC, 
                                              real* DF, 
                                              unsigned int* neighborCX,
                                              unsigned int* neighborCY,
@@ -16362,96 +16362,96 @@ extern "C" __global__ void scaleFC_Fix_27(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -17696,7 +17696,7 @@ extern "C" __global__ void scaleFC_Fix_27(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCpress27(real* DC, 
+__global__ void scaleFCpress27(real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -17722,96 +17722,96 @@ extern "C" __global__ void scaleFCpress27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -18621,7 +18621,7 @@ extern "C" __global__ void scaleFCpress27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCLast27( real* DC, 
+__global__ void scaleFCLast27( real* DC, 
                                           real* DF, 
                                           unsigned int* neighborCX,
                                           unsigned int* neighborCY,
@@ -18647,96 +18647,96 @@ extern "C" __global__ void scaleFCLast27( real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -20017,7 +20017,7 @@ extern "C" __global__ void scaleFCLast27( real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThSMG7(    real* DC, 
+__global__ void scaleFCThSMG7(    real* DC, 
                                              real* DF, 
                                              real* DD7C, 
                                              real* DD7F, 
@@ -20040,96 +20040,96 @@ extern "C" __global__ void scaleFCThSMG7(    real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    Distributions7 D7F;
@@ -20890,7 +20890,7 @@ extern "C" __global__ void scaleFCThSMG7(    real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThS7(   real* DC, 
+__global__ void scaleFCThS7(   real* DC, 
                                           real* DF, 
                                           real* DD7C, 
                                           real* DD7F, 
@@ -20912,96 +20912,96 @@ extern "C" __global__ void scaleFCThS7(   real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    Distributions7 D7F;
@@ -21681,7 +21681,7 @@ extern "C" __global__ void scaleFCThS7(   real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCThS27(     real* DC, 
+__global__ void scaleFCThS27(     real* DC, 
                                              real* DF, 
                                              real* DD27C, 
                                              real* DD27F, 
@@ -21704,187 +21704,187 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, //*fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   //fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   //fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, //*fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      //fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      //fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
 
    Distributions27 D27F;
-   D27F.f[E   ] = &DD27F[E   *size_MatF];
-   D27F.f[W   ] = &DD27F[W   *size_MatF];
-   D27F.f[N   ] = &DD27F[N   *size_MatF];
-   D27F.f[S   ] = &DD27F[S   *size_MatF];
-   D27F.f[T   ] = &DD27F[T   *size_MatF];
-   D27F.f[B   ] = &DD27F[B   *size_MatF];
-   D27F.f[NE  ] = &DD27F[NE  *size_MatF];
-   D27F.f[SW  ] = &DD27F[SW  *size_MatF];
-   D27F.f[SE  ] = &DD27F[SE  *size_MatF];
-   D27F.f[NW  ] = &DD27F[NW  *size_MatF];
-   D27F.f[TE  ] = &DD27F[TE  *size_MatF];
-   D27F.f[BW  ] = &DD27F[BW  *size_MatF];
-   D27F.f[BE  ] = &DD27F[BE  *size_MatF];
-   D27F.f[TW  ] = &DD27F[TW  *size_MatF];
-   D27F.f[TN  ] = &DD27F[TN  *size_MatF];
-   D27F.f[BS  ] = &DD27F[BS  *size_MatF];
-   D27F.f[BN  ] = &DD27F[BN  *size_MatF];
-   D27F.f[TS  ] = &DD27F[TS  *size_MatF];
-   D27F.f[REST] = &DD27F[REST*size_MatF];
-   D27F.f[TNE ] = &DD27F[TNE *size_MatF];
-   D27F.f[TSW ] = &DD27F[TSW *size_MatF];
-   D27F.f[TSE ] = &DD27F[TSE *size_MatF];
-   D27F.f[TNW ] = &DD27F[TNW *size_MatF];
-   D27F.f[BNE ] = &DD27F[BNE *size_MatF];
-   D27F.f[BSW ] = &DD27F[BSW *size_MatF];
-   D27F.f[BSE ] = &DD27F[BSE *size_MatF];
-   D27F.f[BNW ] = &DD27F[BNW *size_MatF];
+   D27F.f[DIR_P00   ] = &DD27F[DIR_P00   *size_MatF];
+   D27F.f[DIR_M00   ] = &DD27F[DIR_M00   *size_MatF];
+   D27F.f[DIR_0P0   ] = &DD27F[DIR_0P0   *size_MatF];
+   D27F.f[DIR_0M0   ] = &DD27F[DIR_0M0   *size_MatF];
+   D27F.f[DIR_00P   ] = &DD27F[DIR_00P   *size_MatF];
+   D27F.f[DIR_00M   ] = &DD27F[DIR_00M   *size_MatF];
+   D27F.f[DIR_PP0  ] = &DD27F[DIR_PP0  *size_MatF];
+   D27F.f[DIR_MM0  ] = &DD27F[DIR_MM0  *size_MatF];
+   D27F.f[DIR_PM0  ] = &DD27F[DIR_PM0  *size_MatF];
+   D27F.f[DIR_MP0  ] = &DD27F[DIR_MP0  *size_MatF];
+   D27F.f[DIR_P0P  ] = &DD27F[DIR_P0P  *size_MatF];
+   D27F.f[DIR_M0M  ] = &DD27F[DIR_M0M  *size_MatF];
+   D27F.f[DIR_P0M  ] = &DD27F[DIR_P0M  *size_MatF];
+   D27F.f[DIR_M0P  ] = &DD27F[DIR_M0P  *size_MatF];
+   D27F.f[DIR_0PP  ] = &DD27F[DIR_0PP  *size_MatF];
+   D27F.f[DIR_0MM  ] = &DD27F[DIR_0MM  *size_MatF];
+   D27F.f[DIR_0PM  ] = &DD27F[DIR_0PM  *size_MatF];
+   D27F.f[DIR_0MP  ] = &DD27F[DIR_0MP  *size_MatF];
+   D27F.f[DIR_000] = &DD27F[DIR_000*size_MatF];
+   D27F.f[DIR_PPP ] = &DD27F[DIR_PPP *size_MatF];
+   D27F.f[DIR_MMP ] = &DD27F[DIR_MMP *size_MatF];
+   D27F.f[DIR_PMP ] = &DD27F[DIR_PMP *size_MatF];
+   D27F.f[DIR_MPP ] = &DD27F[DIR_MPP *size_MatF];
+   D27F.f[DIR_PPM ] = &DD27F[DIR_PPM *size_MatF];
+   D27F.f[DIR_MMM ] = &DD27F[DIR_MMM *size_MatF];
+   D27F.f[DIR_PMM ] = &DD27F[DIR_PMM *size_MatF];
+   D27F.f[DIR_MPM ] = &DD27F[DIR_MPM *size_MatF];
 
    Distributions27 D27C;
    if (isEvenTimestep==true)
    {
-      D27C.f[E   ] = &DD27C[E   *size_MatC];
-      D27C.f[W   ] = &DD27C[W   *size_MatC];
-      D27C.f[N   ] = &DD27C[N   *size_MatC];
-      D27C.f[S   ] = &DD27C[S   *size_MatC];
-      D27C.f[T   ] = &DD27C[T   *size_MatC];
-      D27C.f[B   ] = &DD27C[B   *size_MatC];
-      D27C.f[NE  ] = &DD27C[NE  *size_MatC];
-      D27C.f[SW  ] = &DD27C[SW  *size_MatC];
-      D27C.f[SE  ] = &DD27C[SE  *size_MatC];
-      D27C.f[NW  ] = &DD27C[NW  *size_MatC];
-      D27C.f[TE  ] = &DD27C[TE  *size_MatC];
-      D27C.f[BW  ] = &DD27C[BW  *size_MatC];
-      D27C.f[BE  ] = &DD27C[BE  *size_MatC];
-      D27C.f[TW  ] = &DD27C[TW  *size_MatC];
-      D27C.f[TN  ] = &DD27C[TN  *size_MatC];
-      D27C.f[BS  ] = &DD27C[BS  *size_MatC];
-      D27C.f[BN  ] = &DD27C[BN  *size_MatC];
-      D27C.f[TS  ] = &DD27C[TS  *size_MatC];
-      D27C.f[REST] = &DD27C[REST*size_MatC];
-      D27C.f[TNE ] = &DD27C[TNE *size_MatC];
-      D27C.f[TSW ] = &DD27C[TSW *size_MatC];
-      D27C.f[TSE ] = &DD27C[TSE *size_MatC];
-      D27C.f[TNW ] = &DD27C[TNW *size_MatC];
-      D27C.f[BNE ] = &DD27C[BNE *size_MatC];
-      D27C.f[BSW ] = &DD27C[BSW *size_MatC];
-      D27C.f[BSE ] = &DD27C[BSE *size_MatC];
-      D27C.f[BNW ] = &DD27C[BNW *size_MatC];
+      D27C.f[DIR_P00   ] = &DD27C[DIR_P00   *size_MatC];
+      D27C.f[DIR_M00   ] = &DD27C[DIR_M00   *size_MatC];
+      D27C.f[DIR_0P0   ] = &DD27C[DIR_0P0   *size_MatC];
+      D27C.f[DIR_0M0   ] = &DD27C[DIR_0M0   *size_MatC];
+      D27C.f[DIR_00P   ] = &DD27C[DIR_00P   *size_MatC];
+      D27C.f[DIR_00M   ] = &DD27C[DIR_00M   *size_MatC];
+      D27C.f[DIR_PP0  ] = &DD27C[DIR_PP0  *size_MatC];
+      D27C.f[DIR_MM0  ] = &DD27C[DIR_MM0  *size_MatC];
+      D27C.f[DIR_PM0  ] = &DD27C[DIR_PM0  *size_MatC];
+      D27C.f[DIR_MP0  ] = &DD27C[DIR_MP0  *size_MatC];
+      D27C.f[DIR_P0P  ] = &DD27C[DIR_P0P  *size_MatC];
+      D27C.f[DIR_M0M  ] = &DD27C[DIR_M0M  *size_MatC];
+      D27C.f[DIR_P0M  ] = &DD27C[DIR_P0M  *size_MatC];
+      D27C.f[DIR_M0P  ] = &DD27C[DIR_M0P  *size_MatC];
+      D27C.f[DIR_0PP  ] = &DD27C[DIR_0PP  *size_MatC];
+      D27C.f[DIR_0MM  ] = &DD27C[DIR_0MM  *size_MatC];
+      D27C.f[DIR_0PM  ] = &DD27C[DIR_0PM  *size_MatC];
+      D27C.f[DIR_0MP  ] = &DD27C[DIR_0MP  *size_MatC];
+      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
+      D27C.f[DIR_PPP ] = &DD27C[DIR_PPP *size_MatC];
+      D27C.f[DIR_MMP ] = &DD27C[DIR_MMP *size_MatC];
+      D27C.f[DIR_PMP ] = &DD27C[DIR_PMP *size_MatC];
+      D27C.f[DIR_MPP ] = &DD27C[DIR_MPP *size_MatC];
+      D27C.f[DIR_PPM ] = &DD27C[DIR_PPM *size_MatC];
+      D27C.f[DIR_MMM ] = &DD27C[DIR_MMM *size_MatC];
+      D27C.f[DIR_PMM ] = &DD27C[DIR_PMM *size_MatC];
+      D27C.f[DIR_MPM ] = &DD27C[DIR_MPM *size_MatC];
    }
    else
    {
-      D27C.f[W   ] = &DD27C[E   *size_MatC];
-      D27C.f[E   ] = &DD27C[W   *size_MatC];
-      D27C.f[S   ] = &DD27C[N   *size_MatC];
-      D27C.f[N   ] = &DD27C[S   *size_MatC];
-      D27C.f[B   ] = &DD27C[T   *size_MatC];
-      D27C.f[T   ] = &DD27C[B   *size_MatC];
-      D27C.f[SW  ] = &DD27C[NE  *size_MatC];
-      D27C.f[NE  ] = &DD27C[SW  *size_MatC];
-      D27C.f[NW  ] = &DD27C[SE  *size_MatC];
-      D27C.f[SE  ] = &DD27C[NW  *size_MatC];
-      D27C.f[BW  ] = &DD27C[TE  *size_MatC];
-      D27C.f[TE  ] = &DD27C[BW  *size_MatC];
-      D27C.f[TW  ] = &DD27C[BE  *size_MatC];
-      D27C.f[BE  ] = &DD27C[TW  *size_MatC];
-      D27C.f[BS  ] = &DD27C[TN  *size_MatC];
-      D27C.f[TN  ] = &DD27C[BS  *size_MatC];
-      D27C.f[TS  ] = &DD27C[BN  *size_MatC];
-      D27C.f[BN  ] = &DD27C[TS  *size_MatC];
-      D27C.f[REST] = &DD27C[REST*size_MatC];
-      D27C.f[BSW ] = &DD27C[TNE *size_MatC];
-      D27C.f[BNE ] = &DD27C[TSW *size_MatC];
-      D27C.f[BNW ] = &DD27C[TSE *size_MatC];
-      D27C.f[BSE ] = &DD27C[TNW *size_MatC];
-      D27C.f[TSW ] = &DD27C[BNE *size_MatC];
-      D27C.f[TNE ] = &DD27C[BSW *size_MatC];
-      D27C.f[TNW ] = &DD27C[BSE *size_MatC];
-      D27C.f[TSE ] = &DD27C[BNW *size_MatC];
+      D27C.f[DIR_M00   ] = &DD27C[DIR_P00   *size_MatC];
+      D27C.f[DIR_P00   ] = &DD27C[DIR_M00   *size_MatC];
+      D27C.f[DIR_0M0   ] = &DD27C[DIR_0P0   *size_MatC];
+      D27C.f[DIR_0P0   ] = &DD27C[DIR_0M0   *size_MatC];
+      D27C.f[DIR_00M   ] = &DD27C[DIR_00P   *size_MatC];
+      D27C.f[DIR_00P   ] = &DD27C[DIR_00M   *size_MatC];
+      D27C.f[DIR_MM0  ] = &DD27C[DIR_PP0  *size_MatC];
+      D27C.f[DIR_PP0  ] = &DD27C[DIR_MM0  *size_MatC];
+      D27C.f[DIR_MP0  ] = &DD27C[DIR_PM0  *size_MatC];
+      D27C.f[DIR_PM0  ] = &DD27C[DIR_MP0  *size_MatC];
+      D27C.f[DIR_M0M  ] = &DD27C[DIR_P0P  *size_MatC];
+      D27C.f[DIR_P0P  ] = &DD27C[DIR_M0M  *size_MatC];
+      D27C.f[DIR_M0P  ] = &DD27C[DIR_P0M  *size_MatC];
+      D27C.f[DIR_P0M  ] = &DD27C[DIR_M0P  *size_MatC];
+      D27C.f[DIR_0MM  ] = &DD27C[DIR_0PP  *size_MatC];
+      D27C.f[DIR_0PP  ] = &DD27C[DIR_0MM  *size_MatC];
+      D27C.f[DIR_0MP  ] = &DD27C[DIR_0PM  *size_MatC];
+      D27C.f[DIR_0PM  ] = &DD27C[DIR_0MP  *size_MatC];
+      D27C.f[DIR_000] = &DD27C[DIR_000*size_MatC];
+      D27C.f[DIR_MMM ] = &DD27C[DIR_PPP *size_MatC];
+      D27C.f[DIR_PPM ] = &DD27C[DIR_MMP *size_MatC];
+      D27C.f[DIR_MPM ] = &DD27C[DIR_PMP *size_MatC];
+      D27C.f[DIR_PMM ] = &DD27C[DIR_MPP *size_MatC];
+      D27C.f[DIR_MMP ] = &DD27C[DIR_PPM *size_MatC];
+      D27C.f[DIR_PPP ] = &DD27C[DIR_MMM *size_MatC];
+      D27C.f[DIR_MPP ] = &DD27C[DIR_PMM *size_MatC];
+      D27C.f[DIR_PMP ] = &DD27C[DIR_MPM *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -21980,33 +21980,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_SWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22067,33 +22067,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_SWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22154,33 +22154,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_SET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22241,33 +22241,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_SEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22338,33 +22338,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_NWB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22425,33 +22425,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_NWT = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22512,33 +22512,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_NET = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22599,33 +22599,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
       f_BSE  = fbseF[kbs];
       f_BNW  = fbnwF[kbw];
       //////////////////////////////////////////////////////////////////////////////////
-      f27E    =  (D27F.f[E   ])[kzero];//ke
-      f27W    =  (D27F.f[W   ])[kw   ];
-      f27N    =  (D27F.f[N   ])[kzero];//kn
-      f27S    =  (D27F.f[S   ])[ks   ];
-      f27T    =  (D27F.f[T   ])[kzero];//kt
-      f27B    =  (D27F.f[B   ])[kb   ];
-      f27NE   =  (D27F.f[NE  ])[kzero];//kne
-      f27SW   =  (D27F.f[SW  ])[ksw  ];
-      f27SE   =  (D27F.f[SE  ])[ks   ];//kse
-      f27NW   =  (D27F.f[NW  ])[kw   ];//knw
-      f27TE   =  (D27F.f[TE  ])[kzero];//kte
-      f27BW   =  (D27F.f[BW  ])[kbw  ];
-      f27BE   =  (D27F.f[BE  ])[kb   ];//kbe
-      f27TW   =  (D27F.f[TW  ])[kw   ];//ktw
-      f27TN   =  (D27F.f[TN  ])[kzero];//ktn
-      f27BS   =  (D27F.f[BS  ])[kbs  ];
-      f27BN   =  (D27F.f[BN  ])[kb   ];//kbn
-      f27TS   =  (D27F.f[TS  ])[ks   ];//kts
-      f27ZERO =  (D27F.f[REST])[kzero];//kzero
-      f27TNE   = (D27F.f[TNE ])[kzero];//ktne
-      f27TSW   = (D27F.f[TSW ])[ksw  ];//ktsw
-      f27TSE   = (D27F.f[TSE ])[ks   ];//ktse
-      f27TNW   = (D27F.f[TNW ])[kw   ];//ktnw
-      f27BNE   = (D27F.f[BNE ])[kb   ];//kbne
-      f27BSW   = (D27F.f[BSW ])[kbsw ];
-      f27BSE   = (D27F.f[BSE ])[kbs  ];//kbse
-      f27BNW   = (D27F.f[BNW ])[kbw  ];//kbnw
+      f27E    =  (D27F.f[DIR_P00   ])[kzero];//ke
+      f27W    =  (D27F.f[DIR_M00   ])[kw   ];
+      f27N    =  (D27F.f[DIR_0P0   ])[kzero];//kn
+      f27S    =  (D27F.f[DIR_0M0   ])[ks   ];
+      f27T    =  (D27F.f[DIR_00P   ])[kzero];//kt
+      f27B    =  (D27F.f[DIR_00M   ])[kb   ];
+      f27NE   =  (D27F.f[DIR_PP0  ])[kzero];//kne
+      f27SW   =  (D27F.f[DIR_MM0  ])[ksw  ];
+      f27SE   =  (D27F.f[DIR_PM0  ])[ks   ];//kse
+      f27NW   =  (D27F.f[DIR_MP0  ])[kw   ];//knw
+      f27TE   =  (D27F.f[DIR_P0P  ])[kzero];//kte
+      f27BW   =  (D27F.f[DIR_M0M  ])[kbw  ];
+      f27BE   =  (D27F.f[DIR_P0M  ])[kb   ];//kbe
+      f27TW   =  (D27F.f[DIR_M0P  ])[kw   ];//ktw
+      f27TN   =  (D27F.f[DIR_0PP  ])[kzero];//ktn
+      f27BS   =  (D27F.f[DIR_0MM  ])[kbs  ];
+      f27BN   =  (D27F.f[DIR_0PM  ])[kb   ];//kbn
+      f27TS   =  (D27F.f[DIR_0MP  ])[ks   ];//kts
+      f27ZERO =  (D27F.f[DIR_000])[kzero];//kzero
+      f27TNE   = (D27F.f[DIR_PPP ])[kzero];//ktne
+      f27TSW   = (D27F.f[DIR_MMP ])[ksw  ];//ktsw
+      f27TSE   = (D27F.f[DIR_PMP ])[ks   ];//ktse
+      f27TNW   = (D27F.f[DIR_MPP ])[kw   ];//ktnw
+      f27BNE   = (D27F.f[DIR_PPM ])[kb   ];//kbne
+      f27BSW   = (D27F.f[DIR_MMM ])[kbsw ];
+      f27BSE   = (D27F.f[DIR_PMM ])[kbs  ];//kbse
+      f27BNW   = (D27F.f[DIR_MPM ])[kbw  ];//kbnw
 
       Conc_F_NEB = f27E + f27W + f27N + f27S + f27T + f27B + f27NE + f27SW + f27SE + f27NW + 
                    f27TE + f27BW + f27BE + f27TW + f27TN + f27BS + f27BN + f27TS + f27ZERO + 
@@ -22738,33 +22738,33 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
 
       cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D27C.f[REST])[kzero] =   c8o27* Conc_C*(c1o1-cu_sq);
-      (D27C.f[E   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
-      (D27C.f[W   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-      (D27C.f[N   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-      (D27C.f[S   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-      (D27C.f[T   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
-      (D27C.f[B   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-      (D27C.f[NE  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-      (D27C.f[SW  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-      (D27C.f[SE  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-      (D27C.f[NW  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-      (D27C.f[TE  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-      (D27C.f[BW  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-      (D27C.f[BE  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-      (D27C.f[TW  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-      (D27C.f[TN  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-      (D27C.f[BS  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-      (D27C.f[BN  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-      (D27C.f[TS  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-      (D27C.f[TNE ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-      (D27C.f[BSW ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-      (D27C.f[BNE ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-      (D27C.f[TSW ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-      (D27C.f[TSE ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-      (D27C.f[BNW ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-      (D27C.f[BSE ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-      (D27C.f[TNW ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_000])[kzero] =   c8o27* Conc_C*(c1o1-cu_sq);
+      (D27C.f[DIR_P00   ])[kzero] =   c2o27* (c3o1*( Mx        )+Conc_C*(c1o1+c9o2*( vx1        )*( vx1        )-cu_sq));
+      (D27C.f[DIR_M00   ])[kw   ] =   c2o27* (c3o1*(-Mx        )+Conc_C*(c1o1+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+      (D27C.f[DIR_0P0   ])[kzero] =   c2o27* (c3o1*(     My    )+Conc_C*(c1o1+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+      (D27C.f[DIR_0M0   ])[ks   ] =   c2o27* (c3o1*(    -My    )+Conc_C*(c1o1+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+      (D27C.f[DIR_00P   ])[kzero] =   c2o27* (c3o1*(         Mz)+Conc_C*(c1o1+c9o2*(         vx3)*(         vx3)-cu_sq));
+      (D27C.f[DIR_00M   ])[kb   ] =   c2o27* (c3o1*(        -Mz)+Conc_C*(c1o1+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+      (D27C.f[DIR_PP0  ])[kzero] =   c1o54* (c3o1*( Mx +My    )+Conc_C*(c1o1+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_MM0  ])[ksw  ] =   c1o54* (c3o1*(-Mx -My    )+Conc_C*(c1o1+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_PM0  ])[ks   ] =   c1o54* (c3o1*( Mx -My    )+Conc_C*(c1o1+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+      (D27C.f[DIR_MP0  ])[kw   ] =   c1o54* (c3o1*(-Mx +My    )+Conc_C*(c1o1+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+      (D27C.f[DIR_P0P  ])[kzero] =   c1o54* (c3o1*( Mx     +Mz)+Conc_C*(c1o1+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_M0M  ])[kbw  ] =   c1o54* (c3o1*(-Mx     -Mz)+Conc_C*(c1o1+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_P0M  ])[kb   ] =   c1o54* (c3o1*( Mx     -Mz)+Conc_C*(c1o1+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+      (D27C.f[DIR_M0P  ])[kw   ] =   c1o54* (c3o1*(-Mx     +Mz)+Conc_C*(c1o1+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+      (D27C.f[DIR_0PP  ])[kzero] =   c1o54* (c3o1*(     My +Mz)+Conc_C*(c1o1+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+      (D27C.f[DIR_0MM  ])[kbs  ] =   c1o54* (c3o1*(    -My -Mz)+Conc_C*(c1o1+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0PM  ])[kb   ] =   c1o54* (c3o1*(     My -Mz)+Conc_C*(c1o1+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+      (D27C.f[DIR_0MP  ])[ks   ] =   c1o54* (c3o1*(    -My +Mz)+Conc_C*(c1o1+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PPP ])[kzero] =   c1o216*(c3o1*( Mx +My +Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MMM ])[kbsw ] =   c1o216*(c3o1*(-Mx -My -Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PPM ])[kb   ] =   c1o216*(c3o1*( Mx +My -Mz)+Conc_C*(c1o1+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MMP ])[ksw  ] =   c1o216*(c3o1*(-Mx -My +Mz)+Conc_C*(c1o1+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_PMP ])[ks   ] =   c1o216*(c3o1*( Mx -My +Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+      (D27C.f[DIR_MPM ])[kbw  ] =   c1o216*(c3o1*(-Mx +My -Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+      (D27C.f[DIR_PMM ])[kbs  ] =   c1o216*(c3o1*( Mx -My -Mz)+Conc_C*(c1o1+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+      (D27C.f[DIR_MPP ])[kw   ] =   c1o216*(c3o1*(-Mx +My +Mz)+Conc_C*(c1o1+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
    }
 }
@@ -22804,7 +22804,7 @@ extern "C" __global__ void scaleFCThS27(     real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFCEff27(real* DC, 
+__global__ void scaleFCEff27(real* DC, 
                                         real* DF, 
                                         unsigned int* neighborCX,
                                         unsigned int* neighborCY,
@@ -22830,96 +22830,96 @@ extern "C" __global__ void scaleFCEff27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
       *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
       *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
@@ -23783,7 +23783,7 @@ extern "C" __global__ void scaleFCEff27(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC27(real* DC, 
+__global__ void scaleFC27(real* DC, 
                                      real* DF, 
                                      unsigned int* neighborCX,
                                      unsigned int* neighborCY,
@@ -23808,96 +23808,96 @@ extern "C" __global__ void scaleFC27(real* DC,
    real *feF, *fwF, *fnF, *fsF, *ftF, *fbF, *fneF, *fswF, *fseF, *fnwF, *fteF, *fbwF, *fbeF, *ftwF, *ftnF, *fbsF, *fbnF, *ftsF, *fzeroF, 
          *ftneF, *ftswF, *ftseF, *ftnwF, *fbneF, *fbswF, *fbseF, *fbnwF;
 
-   feF    = &DF[E   *size_MatF];
-   fwF    = &DF[W   *size_MatF];
-   fnF    = &DF[N   *size_MatF];
-   fsF    = &DF[S   *size_MatF];
-   ftF    = &DF[T   *size_MatF];
-   fbF    = &DF[B   *size_MatF];
-   fneF   = &DF[NE  *size_MatF];
-   fswF   = &DF[SW  *size_MatF];
-   fseF   = &DF[SE  *size_MatF];
-   fnwF   = &DF[NW  *size_MatF];
-   fteF   = &DF[TE  *size_MatF];
-   fbwF   = &DF[BW  *size_MatF];
-   fbeF   = &DF[BE  *size_MatF];
-   ftwF   = &DF[TW  *size_MatF];
-   ftnF   = &DF[TN  *size_MatF];
-   fbsF   = &DF[BS  *size_MatF];
-   fbnF   = &DF[BN  *size_MatF];
-   ftsF   = &DF[TS  *size_MatF];
-   fzeroF = &DF[REST*size_MatF];
-   ftneF  = &DF[TNE *size_MatF];
-   ftswF  = &DF[TSW *size_MatF];
-   ftseF  = &DF[TSE *size_MatF];
-   ftnwF  = &DF[TNW *size_MatF];
-   fbneF  = &DF[BNE *size_MatF];
-   fbswF  = &DF[BSW *size_MatF];
-   fbseF  = &DF[BSE *size_MatF];
-   fbnwF  = &DF[BNW *size_MatF];
+   feF    = &DF[DIR_P00   *size_MatF];
+   fwF    = &DF[DIR_M00   *size_MatF];
+   fnF    = &DF[DIR_0P0   *size_MatF];
+   fsF    = &DF[DIR_0M0   *size_MatF];
+   ftF    = &DF[DIR_00P   *size_MatF];
+   fbF    = &DF[DIR_00M   *size_MatF];
+   fneF   = &DF[DIR_PP0  *size_MatF];
+   fswF   = &DF[DIR_MM0  *size_MatF];
+   fseF   = &DF[DIR_PM0  *size_MatF];
+   fnwF   = &DF[DIR_MP0  *size_MatF];
+   fteF   = &DF[DIR_P0P  *size_MatF];
+   fbwF   = &DF[DIR_M0M  *size_MatF];
+   fbeF   = &DF[DIR_P0M  *size_MatF];
+   ftwF   = &DF[DIR_M0P  *size_MatF];
+   ftnF   = &DF[DIR_0PP  *size_MatF];
+   fbsF   = &DF[DIR_0MM  *size_MatF];
+   fbnF   = &DF[DIR_0PM  *size_MatF];
+   ftsF   = &DF[DIR_0MP  *size_MatF];
+   fzeroF = &DF[DIR_000*size_MatF];
+   ftneF  = &DF[DIR_PPP *size_MatF];
+   ftswF  = &DF[DIR_MMP *size_MatF];
+   ftseF  = &DF[DIR_PMP *size_MatF];
+   ftnwF  = &DF[DIR_MPP *size_MatF];
+   fbneF  = &DF[DIR_PPM *size_MatF];
+   fbswF  = &DF[DIR_MMM *size_MatF];
+   fbseF  = &DF[DIR_PMM *size_MatF];
+   fbnwF  = &DF[DIR_MPM *size_MatF];
 
    real *feC, *fwC, *fnC, *fsC, *ftC, *fbC, *fneC, *fswC, *fseC, *fnwC, *fteC, *fbwC, *fbeC, *ftwC, *ftnC, *fbsC, *fbnC, *ftsC, *fzeroC,
          *ftneC, *ftswC, *ftseC, *ftnwC, *fbneC, *fbswC, *fbseC, *fbnwC;
 
    if (isEvenTimestep==true)
    {
-      feC    = &DC[E   *size_MatC];
-      fwC    = &DC[W   *size_MatC];
-      fnC    = &DC[N   *size_MatC];
-      fsC    = &DC[S   *size_MatC];
-      ftC    = &DC[T   *size_MatC];
-      fbC    = &DC[B   *size_MatC];
-      fneC   = &DC[NE  *size_MatC];
-      fswC   = &DC[SW  *size_MatC];
-      fseC   = &DC[SE  *size_MatC];
-      fnwC   = &DC[NW  *size_MatC];
-      fteC   = &DC[TE  *size_MatC];
-      fbwC   = &DC[BW  *size_MatC];
-      fbeC   = &DC[BE  *size_MatC];
-      ftwC   = &DC[TW  *size_MatC];
-      ftnC   = &DC[TN  *size_MatC];
-      fbsC   = &DC[BS  *size_MatC];
-      fbnC   = &DC[BN  *size_MatC];
-      ftsC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      ftneC  = &DC[TNE *size_MatC];
-      ftswC  = &DC[TSW *size_MatC];
-      ftseC  = &DC[TSE *size_MatC];
-      ftnwC  = &DC[TNW *size_MatC];
-      fbneC  = &DC[BNE *size_MatC];
-      fbswC  = &DC[BSW *size_MatC];
-      fbseC  = &DC[BSE *size_MatC];
-      fbnwC  = &DC[BNW *size_MatC];
+      feC    = &DC[DIR_P00   *size_MatC];
+      fwC    = &DC[DIR_M00   *size_MatC];
+      fnC    = &DC[DIR_0P0   *size_MatC];
+      fsC    = &DC[DIR_0M0   *size_MatC];
+      ftC    = &DC[DIR_00P   *size_MatC];
+      fbC    = &DC[DIR_00M   *size_MatC];
+      fneC   = &DC[DIR_PP0  *size_MatC];
+      fswC   = &DC[DIR_MM0  *size_MatC];
+      fseC   = &DC[DIR_PM0  *size_MatC];
+      fnwC   = &DC[DIR_MP0  *size_MatC];
+      fteC   = &DC[DIR_P0P  *size_MatC];
+      fbwC   = &DC[DIR_M0M  *size_MatC];
+      fbeC   = &DC[DIR_P0M  *size_MatC];
+      ftwC   = &DC[DIR_M0P  *size_MatC];
+      ftnC   = &DC[DIR_0PP  *size_MatC];
+      fbsC   = &DC[DIR_0MM  *size_MatC];
+      fbnC   = &DC[DIR_0PM  *size_MatC];
+      ftsC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      ftneC  = &DC[DIR_PPP *size_MatC];
+      ftswC  = &DC[DIR_MMP *size_MatC];
+      ftseC  = &DC[DIR_PMP *size_MatC];
+      ftnwC  = &DC[DIR_MPP *size_MatC];
+      fbneC  = &DC[DIR_PPM *size_MatC];
+      fbswC  = &DC[DIR_MMM *size_MatC];
+      fbseC  = &DC[DIR_PMM *size_MatC];
+      fbnwC  = &DC[DIR_MPM *size_MatC];
    } 
    else
    {
-      fwC    = &DC[E   *size_MatC];
-      feC    = &DC[W   *size_MatC];
-      fsC    = &DC[N   *size_MatC];
-      fnC    = &DC[S   *size_MatC];
-      fbC    = &DC[T   *size_MatC];
-      ftC    = &DC[B   *size_MatC];
-      fswC   = &DC[NE  *size_MatC];
-      fneC   = &DC[SW  *size_MatC];
-      fnwC   = &DC[SE  *size_MatC];
-      fseC   = &DC[NW  *size_MatC];
-      fbwC   = &DC[TE  *size_MatC];
-      fteC   = &DC[BW  *size_MatC];
-      ftwC   = &DC[BE  *size_MatC];
-      fbeC   = &DC[TW  *size_MatC];
-      fbsC   = &DC[TN  *size_MatC];
-      ftnC   = &DC[BS  *size_MatC];
-      ftsC   = &DC[BN  *size_MatC];
-      fbnC   = &DC[TS  *size_MatC];
-      fzeroC = &DC[REST*size_MatC];
-      fbswC  = &DC[TNE *size_MatC];
-      fbneC  = &DC[TSW *size_MatC];
-      fbnwC  = &DC[TSE *size_MatC];
-      fbseC  = &DC[TNW *size_MatC];
-      ftswC  = &DC[BNE *size_MatC];
-      ftneC  = &DC[BSW *size_MatC];
-      ftnwC  = &DC[BSE *size_MatC];
-      ftseC  = &DC[BNW *size_MatC];
+      fwC    = &DC[DIR_P00   *size_MatC];
+      feC    = &DC[DIR_M00   *size_MatC];
+      fsC    = &DC[DIR_0P0   *size_MatC];
+      fnC    = &DC[DIR_0M0   *size_MatC];
+      fbC    = &DC[DIR_00P   *size_MatC];
+      ftC    = &DC[DIR_00M   *size_MatC];
+      fswC   = &DC[DIR_PP0  *size_MatC];
+      fneC   = &DC[DIR_MM0  *size_MatC];
+      fnwC   = &DC[DIR_PM0  *size_MatC];
+      fseC   = &DC[DIR_MP0  *size_MatC];
+      fbwC   = &DC[DIR_P0P  *size_MatC];
+      fteC   = &DC[DIR_M0M  *size_MatC];
+      ftwC   = &DC[DIR_P0M  *size_MatC];
+      fbeC   = &DC[DIR_M0P  *size_MatC];
+      fbsC   = &DC[DIR_0PP  *size_MatC];
+      ftnC   = &DC[DIR_0MM  *size_MatC];
+      ftsC   = &DC[DIR_0PM  *size_MatC];
+      fbnC   = &DC[DIR_0MP  *size_MatC];
+      fzeroC = &DC[DIR_000*size_MatC];
+      fbswC  = &DC[DIR_PPP *size_MatC];
+      fbneC  = &DC[DIR_MMP *size_MatC];
+      fbnwC  = &DC[DIR_PMP *size_MatC];
+      fbseC  = &DC[DIR_MPP *size_MatC];
+      ftswC  = &DC[DIR_PPM *size_MatC];
+      ftneC  = &DC[DIR_MMM *size_MatC];
+      ftnwC  = &DC[DIR_PMM *size_MatC];
+      ftseC  = &DC[DIR_MPM *size_MatC];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  ix = threadIdx.x;  // Globaler x-Index 
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
index 4e55569b1b296476af782a1176e8e4420d3e2022..e7fe8b50637e97b9c8cc34025216f4d02e684c55 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ScaleFC_F3_27.cu
@@ -14,7 +14,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
+__global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 													 real* DF,
 													 real* G6,
 													 unsigned int* neighborCX,
@@ -44,33 +44,33 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[E   *size_MatF];
-   fM00source = &DF[W   *size_MatF];
-   f0P0source = &DF[N   *size_MatF];
-   f0M0source = &DF[S   *size_MatF];
-   f00Psource = &DF[T   *size_MatF];
-   f00Msource = &DF[B   *size_MatF];
-   fPP0source = &DF[NE  *size_MatF];
-   fMM0source = &DF[SW  *size_MatF];
-   fPM0source = &DF[SE  *size_MatF];
-   fMP0source = &DF[NW  *size_MatF];
-   fP0Psource = &DF[TE  *size_MatF];
-   fM0Msource = &DF[BW  *size_MatF];
-   fP0Msource = &DF[BE  *size_MatF];
-   fM0Psource = &DF[TW  *size_MatF];
-   f0PPsource = &DF[TN  *size_MatF];
-   f0MMsource = &DF[BS  *size_MatF];
-   f0PMsource = &DF[BN  *size_MatF];
-   f0MPsource = &DF[TS  *size_MatF];
-   f000source = &DF[REST*size_MatF];
-   fMMMsource = &DF[BSW *size_MatF];
-   fMMPsource = &DF[TSW *size_MatF];
-   fMPPsource = &DF[TNW *size_MatF];
-   fMPMsource = &DF[BNW *size_MatF];
-   fPPMsource = &DF[BNE *size_MatF];
-   fPPPsource = &DF[TNE *size_MatF];
-   fPMPsource = &DF[TSE *size_MatF];
-   fPMMsource = &DF[BSE *size_MatF];
+   fP00source = &DF[DIR_P00   *size_MatF];
+   fM00source = &DF[DIR_M00   *size_MatF];
+   f0P0source = &DF[DIR_0P0   *size_MatF];
+   f0M0source = &DF[DIR_0M0   *size_MatF];
+   f00Psource = &DF[DIR_00P   *size_MatF];
+   f00Msource = &DF[DIR_00M   *size_MatF];
+   fPP0source = &DF[DIR_PP0  *size_MatF];
+   fMM0source = &DF[DIR_MM0  *size_MatF];
+   fPM0source = &DF[DIR_PM0  *size_MatF];
+   fMP0source = &DF[DIR_MP0  *size_MatF];
+   fP0Psource = &DF[DIR_P0P  *size_MatF];
+   fM0Msource = &DF[DIR_M0M  *size_MatF];
+   fP0Msource = &DF[DIR_P0M  *size_MatF];
+   fM0Psource = &DF[DIR_M0P  *size_MatF];
+   f0PPsource = &DF[DIR_0PP  *size_MatF];
+   f0MMsource = &DF[DIR_0MM  *size_MatF];
+   f0PMsource = &DF[DIR_0PM  *size_MatF];
+   f0MPsource = &DF[DIR_0MP  *size_MatF];
+   f000source = &DF[DIR_000*size_MatF];
+   fMMMsource = &DF[DIR_MMM *size_MatF];
+   fMMPsource = &DF[DIR_MMP *size_MatF];
+   fMPPsource = &DF[DIR_MPP *size_MatF];
+   fMPMsource = &DF[DIR_MPM *size_MatF];
+   fPPMsource = &DF[DIR_PPM *size_MatF];
+   fPPPsource = &DF[DIR_PPP *size_MatF];
+   fPMPsource = &DF[DIR_PMP *size_MatF];
+   fPMMsource = &DF[DIR_PMM *size_MatF];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -79,83 +79,83 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[E   *size_MatC];
-	   fM00dest = &DC[W   *size_MatC];
-	   f0P0dest = &DC[N   *size_MatC];
-	   f0M0dest = &DC[S   *size_MatC];
-	   f00Pdest = &DC[T   *size_MatC];
-	   f00Mdest = &DC[B   *size_MatC];
-	   fPP0dest = &DC[NE  *size_MatC];
-	   fMM0dest = &DC[SW  *size_MatC];
-	   fPM0dest = &DC[SE  *size_MatC];
-	   fMP0dest = &DC[NW  *size_MatC];
-	   fP0Pdest = &DC[TE  *size_MatC];
-	   fM0Mdest = &DC[BW  *size_MatC];
-	   fP0Mdest = &DC[BE  *size_MatC];
-	   fM0Pdest = &DC[TW  *size_MatC];
-	   f0PPdest = &DC[TN  *size_MatC];
-	   f0MMdest = &DC[BS  *size_MatC];
-	   f0PMdest = &DC[BN  *size_MatC];
-	   f0MPdest = &DC[TS  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[BSW *size_MatC];
-	   fMMPdest = &DC[TSW *size_MatC];
-	   fMPPdest = &DC[TNW *size_MatC];
-	   fMPMdest = &DC[BNW *size_MatC];
-	   fPPMdest = &DC[BNE *size_MatC];
-	   fPPPdest = &DC[TNE *size_MatC];
-	   fPMPdest = &DC[TSE *size_MatC];
-	   fPMMdest = &DC[BSE *size_MatC];
+	   fP00dest = &DC[DIR_P00   *size_MatC];
+	   fM00dest = &DC[DIR_M00   *size_MatC];
+	   f0P0dest = &DC[DIR_0P0   *size_MatC];
+	   f0M0dest = &DC[DIR_0M0   *size_MatC];
+	   f00Pdest = &DC[DIR_00P   *size_MatC];
+	   f00Mdest = &DC[DIR_00M   *size_MatC];
+	   fPP0dest = &DC[DIR_PP0  *size_MatC];
+	   fMM0dest = &DC[DIR_MM0  *size_MatC];
+	   fPM0dest = &DC[DIR_PM0  *size_MatC];
+	   fMP0dest = &DC[DIR_MP0  *size_MatC];
+	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
+	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
+	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
+	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
+	   f0PPdest = &DC[DIR_0PP  *size_MatC];
+	   f0MMdest = &DC[DIR_0MM  *size_MatC];
+	   f0PMdest = &DC[DIR_0PM  *size_MatC];
+	   f0MPdest = &DC[DIR_0MP  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_MMM *size_MatC];
+	   fMMPdest = &DC[DIR_MMP *size_MatC];
+	   fMPPdest = &DC[DIR_MPP *size_MatC];
+	   fMPMdest = &DC[DIR_MPM *size_MatC];
+	   fPPMdest = &DC[DIR_PPM *size_MatC];
+	   fPPPdest = &DC[DIR_PPP *size_MatC];
+	   fPMPdest = &DC[DIR_PMP *size_MatC];
+	   fPMMdest = &DC[DIR_PMM *size_MatC];
    } 
    else
    {
-	   fP00dest = &DC[W   *size_MatC];
-	   fM00dest = &DC[E   *size_MatC];
-	   f0P0dest = &DC[S   *size_MatC];
-	   f0M0dest = &DC[N   *size_MatC];
-	   f00Pdest = &DC[B   *size_MatC];
-	   f00Mdest = &DC[T   *size_MatC];
-	   fPP0dest = &DC[SW  *size_MatC];
-	   fMM0dest = &DC[NE  *size_MatC];
-	   fPM0dest = &DC[NW  *size_MatC];
-	   fMP0dest = &DC[SE  *size_MatC];
-	   fP0Pdest = &DC[BW  *size_MatC];
-	   fM0Mdest = &DC[TE  *size_MatC];
-	   fP0Mdest = &DC[TW  *size_MatC];
-	   fM0Pdest = &DC[BE  *size_MatC];
-	   f0PPdest = &DC[BS  *size_MatC];
-	   f0MMdest = &DC[TN  *size_MatC];
-	   f0PMdest = &DC[TS  *size_MatC];
-	   f0MPdest = &DC[BN  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[TNE *size_MatC];
-	   fMMPdest = &DC[BNE *size_MatC];
-	   fMPPdest = &DC[BSE *size_MatC];
-	   fMPMdest = &DC[TSE *size_MatC];
-	   fPPMdest = &DC[TSW *size_MatC];
-	   fPPPdest = &DC[BSW *size_MatC];
-	   fPMPdest = &DC[BNW *size_MatC];
-	   fPMMdest = &DC[TNW *size_MatC];
+	   fP00dest = &DC[DIR_M00   *size_MatC];
+	   fM00dest = &DC[DIR_P00   *size_MatC];
+	   f0P0dest = &DC[DIR_0M0   *size_MatC];
+	   f0M0dest = &DC[DIR_0P0   *size_MatC];
+	   f00Pdest = &DC[DIR_00M   *size_MatC];
+	   f00Mdest = &DC[DIR_00P   *size_MatC];
+	   fPP0dest = &DC[DIR_MM0  *size_MatC];
+	   fMM0dest = &DC[DIR_PP0  *size_MatC];
+	   fPM0dest = &DC[DIR_MP0  *size_MatC];
+	   fMP0dest = &DC[DIR_PM0  *size_MatC];
+	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
+	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
+	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
+	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
+	   f0PPdest = &DC[DIR_0MM  *size_MatC];
+	   f0MMdest = &DC[DIR_0PP  *size_MatC];
+	   f0PMdest = &DC[DIR_0MP  *size_MatC];
+	   f0MPdest = &DC[DIR_0PM  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_PPP *size_MatC];
+	   fMMPdest = &DC[DIR_PPM *size_MatC];
+	   fMPPdest = &DC[DIR_PMM *size_MatC];
+	   fMPMdest = &DC[DIR_PMP *size_MatC];
+	   fPPMdest = &DC[DIR_MMP *size_MatC];
+	   fPPPdest = &DC[DIR_MMM *size_MatC];
+	   fPMPdest = &DC[DIR_MPM *size_MatC];
+	   fPMMdest = &DC[DIR_MPP *size_MatC];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[E] = &G6[E   *size_MatC];
-	   G.g[W] = &G6[W   *size_MatC];
-	   G.g[N] = &G6[N   *size_MatC];
-	   G.g[S] = &G6[S   *size_MatC];
-	   G.g[T] = &G6[T   *size_MatC];
-	   G.g[B] = &G6[B   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
+	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
+	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
+	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
+	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
    }
    else
    {
-	   G.g[W] = &G6[E   *size_MatC];
-	   G.g[E] = &G6[W   *size_MatC];
-	   G.g[S] = &G6[N   *size_MatC];
-	   G.g[N] = &G6[S   *size_MatC];
-	   G.g[B] = &G6[T   *size_MatC];
-	   G.g[T] = &G6[B   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
+	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
+	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
+	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
+	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -1168,12 +1168,12 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;                                                                 
 	  fM00dest[kM00] = mfabb;                                                               
@@ -1261,7 +1261,7 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3_2018(real* DC,
 
 
 //////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
+__global__ void scaleFC_comp_D3Q27F3( real* DC,
 												 real* DF,
 												 real* G6,
 												 unsigned int* neighborCX,
@@ -1291,33 +1291,33 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
 	   *f000source, *fMMMsource, *fMMPsource, *fMPPsource, *fMPMsource, *fPPMsource, *fPPPsource, *fPMPsource, *fPMMsource;
 
 
-   fP00source = &DF[E   *size_MatF];
-   fM00source = &DF[W   *size_MatF];
-   f0P0source = &DF[N   *size_MatF];
-   f0M0source = &DF[S   *size_MatF];
-   f00Psource = &DF[T   *size_MatF];
-   f00Msource = &DF[B   *size_MatF];
-   fPP0source = &DF[NE  *size_MatF];
-   fMM0source = &DF[SW  *size_MatF];
-   fPM0source = &DF[SE  *size_MatF];
-   fMP0source = &DF[NW  *size_MatF];
-   fP0Psource = &DF[TE  *size_MatF];
-   fM0Msource = &DF[BW  *size_MatF];
-   fP0Msource = &DF[BE  *size_MatF];
-   fM0Psource = &DF[TW  *size_MatF];
-   f0PPsource = &DF[TN  *size_MatF];
-   f0MMsource = &DF[BS  *size_MatF];
-   f0PMsource = &DF[BN  *size_MatF];
-   f0MPsource = &DF[TS  *size_MatF];
-   f000source = &DF[REST*size_MatF];
-   fMMMsource = &DF[BSW *size_MatF];
-   fMMPsource = &DF[TSW *size_MatF];
-   fMPPsource = &DF[TNW *size_MatF];
-   fMPMsource = &DF[BNW *size_MatF];
-   fPPMsource = &DF[BNE *size_MatF];
-   fPPPsource = &DF[TNE *size_MatF];
-   fPMPsource = &DF[TSE *size_MatF];
-   fPMMsource = &DF[BSE *size_MatF];
+   fP00source = &DF[DIR_P00   *size_MatF];
+   fM00source = &DF[DIR_M00   *size_MatF];
+   f0P0source = &DF[DIR_0P0   *size_MatF];
+   f0M0source = &DF[DIR_0M0   *size_MatF];
+   f00Psource = &DF[DIR_00P   *size_MatF];
+   f00Msource = &DF[DIR_00M   *size_MatF];
+   fPP0source = &DF[DIR_PP0  *size_MatF];
+   fMM0source = &DF[DIR_MM0  *size_MatF];
+   fPM0source = &DF[DIR_PM0  *size_MatF];
+   fMP0source = &DF[DIR_MP0  *size_MatF];
+   fP0Psource = &DF[DIR_P0P  *size_MatF];
+   fM0Msource = &DF[DIR_M0M  *size_MatF];
+   fP0Msource = &DF[DIR_P0M  *size_MatF];
+   fM0Psource = &DF[DIR_M0P  *size_MatF];
+   f0PPsource = &DF[DIR_0PP  *size_MatF];
+   f0MMsource = &DF[DIR_0MM  *size_MatF];
+   f0PMsource = &DF[DIR_0PM  *size_MatF];
+   f0MPsource = &DF[DIR_0MP  *size_MatF];
+   f000source = &DF[DIR_000*size_MatF];
+   fMMMsource = &DF[DIR_MMM *size_MatF];
+   fMMPsource = &DF[DIR_MMP *size_MatF];
+   fMPPsource = &DF[DIR_MPP *size_MatF];
+   fMPMsource = &DF[DIR_MPM *size_MatF];
+   fPPMsource = &DF[DIR_PPM *size_MatF];
+   fPPPsource = &DF[DIR_PPP *size_MatF];
+   fPMPsource = &DF[DIR_PMP *size_MatF];
+   fPMMsource = &DF[DIR_PMM *size_MatF];
 
    real
 	   *fP00dest, *fM00dest, *f0P0dest, *f0M0dest, *f00Pdest, *f00Mdest, *fPP0dest, *fMM0dest, *fPM0dest,
@@ -1326,83 +1326,83 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
 
    if (isEvenTimestep==true)
    {
-	   fP00dest = &DC[E   *size_MatC];
-	   fM00dest = &DC[W   *size_MatC];
-	   f0P0dest = &DC[N   *size_MatC];
-	   f0M0dest = &DC[S   *size_MatC];
-	   f00Pdest = &DC[T   *size_MatC];
-	   f00Mdest = &DC[B   *size_MatC];
-	   fPP0dest = &DC[NE  *size_MatC];
-	   fMM0dest = &DC[SW  *size_MatC];
-	   fPM0dest = &DC[SE  *size_MatC];
-	   fMP0dest = &DC[NW  *size_MatC];
-	   fP0Pdest = &DC[TE  *size_MatC];
-	   fM0Mdest = &DC[BW  *size_MatC];
-	   fP0Mdest = &DC[BE  *size_MatC];
-	   fM0Pdest = &DC[TW  *size_MatC];
-	   f0PPdest = &DC[TN  *size_MatC];
-	   f0MMdest = &DC[BS  *size_MatC];
-	   f0PMdest = &DC[BN  *size_MatC];
-	   f0MPdest = &DC[TS  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[BSW *size_MatC];
-	   fMMPdest = &DC[TSW *size_MatC];
-	   fMPPdest = &DC[TNW *size_MatC];
-	   fMPMdest = &DC[BNW *size_MatC];
-	   fPPMdest = &DC[BNE *size_MatC];
-	   fPPPdest = &DC[TNE *size_MatC];
-	   fPMPdest = &DC[TSE *size_MatC];
-	   fPMMdest = &DC[BSE *size_MatC];
+	   fP00dest = &DC[DIR_P00   *size_MatC];
+	   fM00dest = &DC[DIR_M00   *size_MatC];
+	   f0P0dest = &DC[DIR_0P0   *size_MatC];
+	   f0M0dest = &DC[DIR_0M0   *size_MatC];
+	   f00Pdest = &DC[DIR_00P   *size_MatC];
+	   f00Mdest = &DC[DIR_00M   *size_MatC];
+	   fPP0dest = &DC[DIR_PP0  *size_MatC];
+	   fMM0dest = &DC[DIR_MM0  *size_MatC];
+	   fPM0dest = &DC[DIR_PM0  *size_MatC];
+	   fMP0dest = &DC[DIR_MP0  *size_MatC];
+	   fP0Pdest = &DC[DIR_P0P  *size_MatC];
+	   fM0Mdest = &DC[DIR_M0M  *size_MatC];
+	   fP0Mdest = &DC[DIR_P0M  *size_MatC];
+	   fM0Pdest = &DC[DIR_M0P  *size_MatC];
+	   f0PPdest = &DC[DIR_0PP  *size_MatC];
+	   f0MMdest = &DC[DIR_0MM  *size_MatC];
+	   f0PMdest = &DC[DIR_0PM  *size_MatC];
+	   f0MPdest = &DC[DIR_0MP  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_MMM *size_MatC];
+	   fMMPdest = &DC[DIR_MMP *size_MatC];
+	   fMPPdest = &DC[DIR_MPP *size_MatC];
+	   fMPMdest = &DC[DIR_MPM *size_MatC];
+	   fPPMdest = &DC[DIR_PPM *size_MatC];
+	   fPPPdest = &DC[DIR_PPP *size_MatC];
+	   fPMPdest = &DC[DIR_PMP *size_MatC];
+	   fPMMdest = &DC[DIR_PMM *size_MatC];
    } 
    else
    {
-	   fP00dest = &DC[W   *size_MatC];
-	   fM00dest = &DC[E   *size_MatC];
-	   f0P0dest = &DC[S   *size_MatC];
-	   f0M0dest = &DC[N   *size_MatC];
-	   f00Pdest = &DC[B   *size_MatC];
-	   f00Mdest = &DC[T   *size_MatC];
-	   fPP0dest = &DC[SW  *size_MatC];
-	   fMM0dest = &DC[NE  *size_MatC];
-	   fPM0dest = &DC[NW  *size_MatC];
-	   fMP0dest = &DC[SE  *size_MatC];
-	   fP0Pdest = &DC[BW  *size_MatC];
-	   fM0Mdest = &DC[TE  *size_MatC];
-	   fP0Mdest = &DC[TW  *size_MatC];
-	   fM0Pdest = &DC[BE  *size_MatC];
-	   f0PPdest = &DC[BS  *size_MatC];
-	   f0MMdest = &DC[TN  *size_MatC];
-	   f0PMdest = &DC[TS  *size_MatC];
-	   f0MPdest = &DC[BN  *size_MatC];
-	   f000dest = &DC[REST*size_MatC];
-	   fMMMdest = &DC[TNE *size_MatC];
-	   fMMPdest = &DC[BNE *size_MatC];
-	   fMPPdest = &DC[BSE *size_MatC];
-	   fMPMdest = &DC[TSE *size_MatC];
-	   fPPMdest = &DC[TSW *size_MatC];
-	   fPPPdest = &DC[BSW *size_MatC];
-	   fPMPdest = &DC[BNW *size_MatC];
-	   fPMMdest = &DC[TNW *size_MatC];
+	   fP00dest = &DC[DIR_M00   *size_MatC];
+	   fM00dest = &DC[DIR_P00   *size_MatC];
+	   f0P0dest = &DC[DIR_0M0   *size_MatC];
+	   f0M0dest = &DC[DIR_0P0   *size_MatC];
+	   f00Pdest = &DC[DIR_00M   *size_MatC];
+	   f00Mdest = &DC[DIR_00P   *size_MatC];
+	   fPP0dest = &DC[DIR_MM0  *size_MatC];
+	   fMM0dest = &DC[DIR_PP0  *size_MatC];
+	   fPM0dest = &DC[DIR_MP0  *size_MatC];
+	   fMP0dest = &DC[DIR_PM0  *size_MatC];
+	   fP0Pdest = &DC[DIR_M0M  *size_MatC];
+	   fM0Mdest = &DC[DIR_P0P  *size_MatC];
+	   fP0Mdest = &DC[DIR_M0P  *size_MatC];
+	   fM0Pdest = &DC[DIR_P0M  *size_MatC];
+	   f0PPdest = &DC[DIR_0MM  *size_MatC];
+	   f0MMdest = &DC[DIR_0PP  *size_MatC];
+	   f0PMdest = &DC[DIR_0MP  *size_MatC];
+	   f0MPdest = &DC[DIR_0PM  *size_MatC];
+	   f000dest = &DC[DIR_000*size_MatC];
+	   fMMMdest = &DC[DIR_PPP *size_MatC];
+	   fMMPdest = &DC[DIR_PPM *size_MatC];
+	   fMPPdest = &DC[DIR_PMM *size_MatC];
+	   fMPMdest = &DC[DIR_PMP *size_MatC];
+	   fPPMdest = &DC[DIR_MMP *size_MatC];
+	   fPPPdest = &DC[DIR_MMM *size_MatC];
+	   fPMPdest = &DC[DIR_MPM *size_MatC];
+	   fPMMdest = &DC[DIR_MPP *size_MatC];
    }
 
    Distributions6 G;
    if (isEvenTimestep == true)
    {
-	   G.g[E] = &G6[E   *size_MatC];
-	   G.g[W] = &G6[W   *size_MatC];
-	   G.g[N] = &G6[N   *size_MatC];
-	   G.g[S] = &G6[S   *size_MatC];
-	   G.g[T] = &G6[T   *size_MatC];
-	   G.g[B] = &G6[B   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_P00   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_M00   *size_MatC];
+	   G.g[DIR_0P0] = &G6[DIR_0P0   *size_MatC];
+	   G.g[DIR_0M0] = &G6[DIR_0M0   *size_MatC];
+	   G.g[DIR_00P] = &G6[DIR_00P   *size_MatC];
+	   G.g[DIR_00M] = &G6[DIR_00M   *size_MatC];
    }
    else
    {
-	   G.g[W] = &G6[E   *size_MatC];
-	   G.g[E] = &G6[W   *size_MatC];
-	   G.g[S] = &G6[N   *size_MatC];
-	   G.g[N] = &G6[S   *size_MatC];
-	   G.g[B] = &G6[T   *size_MatC];
-	   G.g[T] = &G6[B   *size_MatC];
+	   G.g[DIR_M00] = &G6[DIR_P00   *size_MatC];
+	   G.g[DIR_P00] = &G6[DIR_M00   *size_MatC];
+	   G.g[DIR_0M0] = &G6[DIR_0P0   *size_MatC];
+	   G.g[DIR_0P0] = &G6[DIR_0M0   *size_MatC];
+	   G.g[DIR_00M] = &G6[DIR_00P   *size_MatC];
+	   G.g[DIR_00P] = &G6[DIR_00M   *size_MatC];
    }
 
    ////////////////////////////////////////////////////////////////////////////////
@@ -2409,12 +2409,12 @@ extern "C" __global__ void scaleFC_comp_D3Q27F3( real* DC,
 	  ////////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////////
-	  (G.g[E])[k000] = mgcbb;
-	  (G.g[W])[kM00] = mgabb;
-	  (G.g[N])[k000] = mgbcb;
-	  (G.g[S])[k0M0] = mgbab;
-	  (G.g[T])[k000] = mgbbc;
-	  (G.g[B])[k00M] = mgbba;
+	  (G.g[DIR_P00])[k000] = mgcbb;
+	  (G.g[DIR_M00])[kM00] = mgabb;
+	  (G.g[DIR_0P0])[k000] = mgbcb;
+	  (G.g[DIR_0M0])[k0M0] = mgbab;
+	  (G.g[DIR_00P])[k000] = mgbbc;
+	  (G.g[DIR_00M])[k00M] = mgbba;
 	  ////////////////////////////////////////////////////////////////////////////////////
 	  fP00dest[k000] = mfcbb;                                                                 
 	  fM00dest[kM00] = mfabb;                                                               
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
index d90cfab98cea27f6d3e5d18f772377326cbc5f6d..8675780d26e63656b04fdfc1f9836b1eba8d1b87 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SchlafferBCs27.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void PressSchlaff27(real* rhoBC,
+__global__ void PressSchlaff27(real* rhoBC,
                                           real* DD,
                                           real* vx0,
                                           real* vy0,
@@ -71,94 +71,94 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       }
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[E   ])[ke   ];
-      f1_W    = (D.f[W   ])[kw   ];
-      f1_N    = (D.f[N   ])[kn   ];
-      f1_S    = (D.f[S   ])[ks   ];
-      f1_T    = (D.f[T   ])[kt   ];
-      f1_B    = (D.f[B   ])[kb   ];
-      f1_NE   = (D.f[NE  ])[kne  ];
-      f1_SW   = (D.f[SW  ])[ksw  ];
-      f1_SE   = (D.f[SE  ])[kse  ];
-      f1_NW   = (D.f[NW  ])[knw  ];
-      f1_TE   = (D.f[TE  ])[kte  ];
-      f1_BW   = (D.f[BW  ])[kbw  ];
-      f1_BE   = (D.f[BE  ])[kbe  ];
-      f1_TW   = (D.f[TW  ])[ktw  ];
-      f1_TN   = (D.f[TN  ])[ktn  ];
-      f1_BS   = (D.f[BS  ])[kbs  ];
-      f1_BN   = (D.f[BN  ])[kbn  ];
-      f1_TS   = (D.f[TS  ])[kts  ];
-      f1_ZERO = (D.f[REST])[kzero];
-      f1_TNE  = (D.f[TNE ])[ktne ];
-      f1_TSW  = (D.f[TSW ])[ktsw ];
-      f1_TSE  = (D.f[TSE ])[ktse ];
-      f1_TNW  = (D.f[TNW ])[ktnw ];
-      f1_BNE  = (D.f[BNE ])[kbne ];
-      f1_BSW  = (D.f[BSW ])[kbsw ];
-      f1_BSE  = (D.f[BSE ])[kbse ];
-      f1_BNW  = (D.f[BNW ])[kbnw ];
+      f1_E    = (D.f[DIR_P00   ])[ke   ];
+      f1_W    = (D.f[DIR_M00   ])[kw   ];
+      f1_N    = (D.f[DIR_0P0   ])[kn   ];
+      f1_S    = (D.f[DIR_0M0   ])[ks   ];
+      f1_T    = (D.f[DIR_00P   ])[kt   ];
+      f1_B    = (D.f[DIR_00M   ])[kb   ];
+      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
+      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
+      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
+      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
+      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_ZERO = (D.f[DIR_000])[kzero];
+      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -222,15 +222,15 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
 
       deltaVz0[k] = tempDeltaV;
 
-      (D.f[B   ])[kb   ] = f1_B   ;
-      (D.f[BW  ])[kbw  ] = f1_BW  ;
-      (D.f[BE  ])[kbe  ] = f1_BE  ;
-      (D.f[BS  ])[kbs  ] = f1_BS  ;
-      (D.f[BN  ])[kbn  ] = f1_BN  ;
-      (D.f[BNE ])[kbne ] = f1_BNE ;
-      (D.f[BSW ])[kbsw ] = f1_BSW ;
-      (D.f[BSE ])[kbse ] = f1_BSE ;
-      (D.f[BNW ])[kbnw ] = f1_BNW ;
+      (D.f[DIR_00M   ])[kb   ] = f1_B   ;
+      (D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
+      (D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
+      (D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
+      (D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
+      (D.f[DIR_PPM ])[kbne ] = f1_BNE ;
+      (D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
+      (D.f[DIR_PMM ])[kbse ] = f1_BSE ;
+      (D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -274,7 +274,7 @@ extern "C" __global__ void PressSchlaff27(real* rhoBC,
 
 // TODO: https://git.rz.tu-bs.de/irmb/VirtualFluids_dev/-/issues/29
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void VelSchlaff27(  int t,
+__global__ void VelSchlaff27(  int t,
                                           real* DD,
                                           real* vz0,
                                           real* deltaVz0,
@@ -335,122 +335,122 @@ extern "C" __global__ void VelSchlaff27(  int t,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       }
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
                      f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_E    = (D.f[E   ])[ke   ];
-      f1_W    = (D.f[W   ])[kw   ];
-      f1_N    = (D.f[N   ])[kn   ];
-      f1_S    = (D.f[S   ])[ks   ];
-      f1_T    = (D.f[T   ])[kt   ];
-      f1_B    = (D.f[B   ])[kb   ];
-      f1_NE   = (D.f[NE  ])[kne  ];
-      f1_SW   = (D.f[SW  ])[ksw  ];
-      f1_SE   = (D.f[SE  ])[kse  ];
-      f1_NW   = (D.f[NW  ])[knw  ];
-      f1_TE   = (D.f[TE  ])[kte  ];
-      f1_BW   = (D.f[BW  ])[kbw  ];
-      f1_BE   = (D.f[BE  ])[kbe  ];
-      f1_TW   = (D.f[TW  ])[ktw  ];
-      f1_TN   = (D.f[TN  ])[ktn  ];
-      f1_BS   = (D.f[BS  ])[kbs  ];
-      f1_BN   = (D.f[BN  ])[kbn  ];
-      f1_TS   = (D.f[TS  ])[kts  ];
-      f1_ZERO = (D.f[REST])[kzero];
-      f1_TNE  = (D.f[TNE ])[ktne ];
-      f1_TSW  = (D.f[TSW ])[ktsw ];
-      f1_TSE  = (D.f[TSE ])[ktse ];
-      f1_TNW  = (D.f[TNW ])[ktnw ];
-      f1_BNE  = (D.f[BNE ])[kbne ];
-      f1_BSW  = (D.f[BSW ])[kbsw ];
-      f1_BSE  = (D.f[BSE ])[kbse ];
-      f1_BNW  = (D.f[BNW ])[kbnw ];
-      //f1_W    = (D.f[E   ])[ke   ];
-      //f1_E    = (D.f[W   ])[kw   ];
-      //f1_S    = (D.f[N   ])[kn   ];
-      //f1_N    = (D.f[S   ])[ks   ];
-      //f1_B    = (D.f[T   ])[kt   ];
-      //f1_T    = (D.f[B   ])[kb   ];
-      //f1_SW   = (D.f[NE  ])[kne  ];
-      //f1_NE   = (D.f[SW  ])[ksw  ];
-      //f1_NW   = (D.f[SE  ])[kse  ];
-      //f1_SE   = (D.f[NW  ])[knw  ];
-      //f1_BW   = (D.f[TE  ])[kte  ];
-      //f1_TE   = (D.f[BW  ])[kbw  ];
-      //f1_TW   = (D.f[BE  ])[kbe  ];
-      //f1_BE   = (D.f[TW  ])[ktw  ];
-      //f1_BS   = (D.f[TN  ])[ktn  ];
-      //f1_TN   = (D.f[BS  ])[kbs  ];
-      //f1_TS   = (D.f[BN  ])[kbn  ];
-      //f1_BN   = (D.f[TS  ])[kts  ];
-      //f1_ZERO = (D.f[REST])[kzero];
-      //f1_BSW  = (D.f[TNE ])[ktne ];
-      //f1_BNE  = (D.f[TSW ])[ktsw ];
-      //f1_BNW  = (D.f[TSE ])[ktse ];
-      //f1_BSE  = (D.f[TNW ])[ktnw ];
-      //f1_TSW  = (D.f[BNE ])[kbne ];
-      //f1_TNE  = (D.f[BSW ])[kbsw ];
-      //f1_TNW  = (D.f[BSE ])[kbse ];
-      //f1_TSE  = (D.f[BNW ])[kbnw ];
+      f1_E    = (D.f[DIR_P00   ])[ke   ];
+      f1_W    = (D.f[DIR_M00   ])[kw   ];
+      f1_N    = (D.f[DIR_0P0   ])[kn   ];
+      f1_S    = (D.f[DIR_0M0   ])[ks   ];
+      f1_T    = (D.f[DIR_00P   ])[kt   ];
+      f1_B    = (D.f[DIR_00M   ])[kb   ];
+      f1_NE   = (D.f[DIR_PP0  ])[kne  ];
+      f1_SW   = (D.f[DIR_MM0  ])[ksw  ];
+      f1_SE   = (D.f[DIR_PM0  ])[kse  ];
+      f1_NW   = (D.f[DIR_MP0  ])[knw  ];
+      f1_TE   = (D.f[DIR_P0P  ])[kte  ];
+      f1_BW   = (D.f[DIR_M0M  ])[kbw  ];
+      f1_BE   = (D.f[DIR_P0M  ])[kbe  ];
+      f1_TW   = (D.f[DIR_M0P  ])[ktw  ];
+      f1_TN   = (D.f[DIR_0PP  ])[ktn  ];
+      f1_BS   = (D.f[DIR_0MM  ])[kbs  ];
+      f1_BN   = (D.f[DIR_0PM  ])[kbn  ];
+      f1_TS   = (D.f[DIR_0MP  ])[kts  ];
+      f1_ZERO = (D.f[DIR_000])[kzero];
+      f1_TNE  = (D.f[DIR_PPP ])[ktne ];
+      f1_TSW  = (D.f[DIR_MMP ])[ktsw ];
+      f1_TSE  = (D.f[DIR_PMP ])[ktse ];
+      f1_TNW  = (D.f[DIR_MPP ])[ktnw ];
+      f1_BNE  = (D.f[DIR_PPM ])[kbne ];
+      f1_BSW  = (D.f[DIR_MMM ])[kbsw ];
+      f1_BSE  = (D.f[DIR_PMM ])[kbse ];
+      f1_BNW  = (D.f[DIR_MPM ])[kbnw ];
+      //f1_W    = (D.f[DIR_P00   ])[ke   ];
+      //f1_E    = (D.f[DIR_M00   ])[kw   ];
+      //f1_S    = (D.f[DIR_0P0   ])[kn   ];
+      //f1_N    = (D.f[DIR_0M0   ])[ks   ];
+      //f1_B    = (D.f[DIR_00P   ])[kt   ];
+      //f1_T    = (D.f[DIR_00M   ])[kb   ];
+      //f1_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //f1_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //f1_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //f1_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //f1_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //f1_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //f1_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //f1_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //f1_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //f1_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //f1_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //f1_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //f1_ZERO = (D.f[DIR_000])[kzero];
+      //f1_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //f1_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //f1_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //f1_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //f1_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //f1_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //f1_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //f1_TSE  = (D.f[DIR_MPM ])[kbnw ];
       //////////////////////////////////////////////////////////////////////////
       real cs       = c1o1/sqrt(c3o1);
       real csp1     = cs + c1o1;
@@ -522,64 +522,64 @@ extern "C" __global__ void VelSchlaff27(  int t,
       f1_TNW = f1_BSE - c1o36 * (VX - VY - VZ);
 
       deltaVz0[k] = tempDeltaV;
-      (D.f[T   ])[kt   ] = f1_T  ;
-      (D.f[TE  ])[kte  ] = f1_TE ;
-      (D.f[TW  ])[ktw  ] = f1_TW ;
-      (D.f[TN  ])[ktn  ] = f1_TN ;
-      (D.f[TS  ])[kts  ] = f1_TS ;
-      (D.f[TNE ])[ktne ] = f1_TNE;
-      (D.f[TSW ])[ktsw ] = f1_TSW;
-      (D.f[TSE ])[ktse ] = f1_TSE;
-      (D.f[TNW ])[ktnw ] = f1_TNW;
-
-      //(D.f[B   ])[kb   ] = f1_B   ;
-      //(D.f[BW  ])[kbw  ] = f1_BW  ;
-      //(D.f[BE  ])[kbe  ] = f1_BE  ;
-      //(D.f[BS  ])[kbs  ] = f1_BS  ;
-      //(D.f[BN  ])[kbn  ] = f1_BN  ;
-      //(D.f[BNE ])[kbne ] = f1_BNE ;
-      //(D.f[BSW ])[kbsw ] = f1_BSW ;
-      //(D.f[BSE ])[kbse ] = f1_BSE ;
-      //(D.f[BNW ])[kbnw ] = f1_BNW ;
-
-
-      //(D.f[T   ])[kt   ] = f1_B  ;
-      //(D.f[TE  ])[kte  ] = f1_BW ;
-      //(D.f[TW  ])[ktw  ] = f1_BE ;
-      //(D.f[TN  ])[ktn  ] = f1_BS ;
-      //(D.f[TS  ])[kts  ] = f1_BN ;
-      //(D.f[TNE ])[ktne ] = f1_BSW;
-      //(D.f[TSW ])[ktsw ] = f1_BNE;
-      //(D.f[TSE ])[ktse ] = f1_BNW;
-      //(D.f[TNW ])[ktnw ] = f1_BSE;
-
-      //(D.f[E   ])[ke   ] = f1_W   -c2over27*drho1;
-      //(D.f[W   ])[kw   ] = f1_E   -c2over27*drho1;
-      //(D.f[N   ])[kn   ] = f1_S   -c2over27*drho1;
-      //(D.f[S   ])[ks   ] = f1_N   -c2over27*drho1;
-      //(D.f[T   ])[kt   ] = f1_B   -c2over27*drho1;
-      //(D.f[B   ])[kb   ] = f1_T   -c2over27*drho1;
-      //(D.f[NE  ])[kne  ] = f1_SW  -c1over54*drho1;
-      //(D.f[SW  ])[ksw  ] = f1_NE  -c1over54*drho1;
-      //(D.f[SE  ])[kse  ] = f1_NW  -c1over54*drho1;
-      //(D.f[NW  ])[knw  ] = f1_SE  -c1over54*drho1;
-      //(D.f[TE  ])[kte  ] = f1_BW  -c1over54*drho1;
-      //(D.f[BW  ])[kbw  ] = f1_TE  -c1over54*drho1;
-      //(D.f[BE  ])[kbe  ] = f1_TW  -c1over54*drho1;
-      //(D.f[TW  ])[ktw  ] = f1_BE  -c1over54*drho1;
-      //(D.f[TN  ])[ktn  ] = f1_BS  -c1over54*drho1;
-      //(D.f[BS  ])[kbs  ] = f1_TN  -c1over54*drho1;
-      //(D.f[BN  ])[kbn  ] = f1_TS  -c1over54*drho1;
-      //(D.f[TS  ])[kts  ] = f1_BN  -c1over54*drho1;
-      //(D.f[REST])[kzero] = f1_ZERO-c8over27*drho1;
-      //(D.f[TNE ])[ktne ] = f1_BSW -c1over216*drho1;
-      //(D.f[TSW ])[ktsw ] = f1_BNE -c1over216*drho1;
-      //(D.f[TSE ])[ktse ] = f1_BNW -c1over216*drho1;
-      //(D.f[TNW ])[ktnw ] = f1_BSE -c1over216*drho1;
-      //(D.f[BNE ])[kbne ] = f1_TSW -c1over216*drho1;
-      //(D.f[BSW ])[kbsw ] = f1_TNE -c1over216*drho1;
-      //(D.f[BSE ])[kbse ] = f1_TNW -c1over216*drho1;
-      //(D.f[BNW ])[kbnw ] = f1_TSE -c1over216*drho1;
+      (D.f[DIR_00P   ])[kt   ] = f1_T  ;
+      (D.f[DIR_P0P  ])[kte  ] = f1_TE ;
+      (D.f[DIR_M0P  ])[ktw  ] = f1_TW ;
+      (D.f[DIR_0PP  ])[ktn  ] = f1_TN ;
+      (D.f[DIR_0MP  ])[kts  ] = f1_TS ;
+      (D.f[DIR_PPP ])[ktne ] = f1_TNE;
+      (D.f[DIR_MMP ])[ktsw ] = f1_TSW;
+      (D.f[DIR_PMP ])[ktse ] = f1_TSE;
+      (D.f[DIR_MPP ])[ktnw ] = f1_TNW;
+
+      //(D.f[DIR_00M   ])[kb   ] = f1_B   ;
+      //(D.f[DIR_M0M  ])[kbw  ] = f1_BW  ;
+      //(D.f[DIR_P0M  ])[kbe  ] = f1_BE  ;
+      //(D.f[DIR_0MM  ])[kbs  ] = f1_BS  ;
+      //(D.f[DIR_0PM  ])[kbn  ] = f1_BN  ;
+      //(D.f[DIR_PPM ])[kbne ] = f1_BNE ;
+      //(D.f[DIR_MMM ])[kbsw ] = f1_BSW ;
+      //(D.f[DIR_PMM ])[kbse ] = f1_BSE ;
+      //(D.f[DIR_MPM ])[kbnw ] = f1_BNW ;
+
+
+      //(D.f[DIR_00P   ])[kt   ] = f1_B  ;
+      //(D.f[DIR_P0P  ])[kte  ] = f1_BW ;
+      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE ;
+      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS ;
+      //(D.f[DIR_0MP  ])[kts  ] = f1_BN ;
+      //(D.f[DIR_PPP ])[ktne ] = f1_BSW;
+      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE;
+      //(D.f[DIR_PMP ])[ktse ] = f1_BNW;
+      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE;
+
+      //(D.f[DIR_P00   ])[ke   ] = f1_W   -c2over27*drho1;
+      //(D.f[DIR_M00   ])[kw   ] = f1_E   -c2over27*drho1;
+      //(D.f[DIR_0P0   ])[kn   ] = f1_S   -c2over27*drho1;
+      //(D.f[DIR_0M0   ])[ks   ] = f1_N   -c2over27*drho1;
+      //(D.f[DIR_00P   ])[kt   ] = f1_B   -c2over27*drho1;
+      //(D.f[DIR_00M   ])[kb   ] = f1_T   -c2over27*drho1;
+      //(D.f[DIR_PP0  ])[kne  ] = f1_SW  -c1over54*drho1;
+      //(D.f[DIR_MM0  ])[ksw  ] = f1_NE  -c1over54*drho1;
+      //(D.f[DIR_PM0  ])[kse  ] = f1_NW  -c1over54*drho1;
+      //(D.f[DIR_MP0  ])[knw  ] = f1_SE  -c1over54*drho1;
+      //(D.f[DIR_P0P  ])[kte  ] = f1_BW  -c1over54*drho1;
+      //(D.f[DIR_M0M  ])[kbw  ] = f1_TE  -c1over54*drho1;
+      //(D.f[DIR_P0M  ])[kbe  ] = f1_TW  -c1over54*drho1;
+      //(D.f[DIR_M0P  ])[ktw  ] = f1_BE  -c1over54*drho1;
+      //(D.f[DIR_0PP  ])[ktn  ] = f1_BS  -c1over54*drho1;
+      //(D.f[DIR_0MM  ])[kbs  ] = f1_TN  -c1over54*drho1;
+      //(D.f[DIR_0PM  ])[kbn  ] = f1_TS  -c1over54*drho1;
+      //(D.f[DIR_0MP  ])[kts  ] = f1_BN  -c1over54*drho1;
+      //(D.f[DIR_000])[kzero] = f1_ZERO-c8over27*drho1;
+      //(D.f[DIR_PPP ])[ktne ] = f1_BSW -c1over216*drho1;
+      //(D.f[DIR_MMP ])[ktsw ] = f1_BNE -c1over216*drho1;
+      //(D.f[DIR_PMP ])[ktse ] = f1_BNW -c1over216*drho1;
+      //(D.f[DIR_MPP ])[ktnw ] = f1_BSE -c1over216*drho1;
+      //(D.f[DIR_PPM ])[kbne ] = f1_TSW -c1over216*drho1;
+      //(D.f[DIR_MMM ])[kbsw ] = f1_TNE -c1over216*drho1;
+      //(D.f[DIR_PMM ])[kbse ] = f1_TNW -c1over216*drho1;
+      //(D.f[DIR_MPM ])[kbnw ] = f1_TSE -c1over216*drho1;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
index 303cf6424607e0f3427ed3735cc0137d7a54028e..8dbf2c670a549f9a6afe581510205c31246b50cb 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SetForcing27.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void GetVeloforForcing27( real* DD, 
+__global__ void GetVeloforForcing27( real* DD, 
 												int* bcIndex, 
 												int nonAtBC, 
 												real* Vx,
@@ -22,63 +22,63 @@ extern "C" __global__ void GetVeloforForcing27( real* DD,
 	Distributions27 D;
 	if (isEvenTimestep==false)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -123,33 +123,33 @@ extern "C" __global__ void GetVeloforForcing27( real* DD,
 		unsigned int ktne = KQK;
 		unsigned int kbsw = neighborZ[ksw];
 		////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[E   ])[ke   ];
-		real mfabb = (D.f[W   ])[kw   ];
-		real mfbcb = (D.f[N   ])[kn   ];
-		real mfbab = (D.f[S   ])[ks   ];
-		real mfbbc = (D.f[T   ])[kt   ];
-		real mfbba = (D.f[B   ])[kb   ];
-		real mfccb = (D.f[NE  ])[kne  ];
-		real mfaab = (D.f[SW  ])[ksw  ];
-		real mfcab = (D.f[SE  ])[kse  ];
-		real mfacb = (D.f[NW  ])[knw  ];
-		real mfcbc = (D.f[TE  ])[kte  ];
-		real mfaba = (D.f[BW  ])[kbw  ];
-		real mfcba = (D.f[BE  ])[kbe  ];
-		real mfabc = (D.f[TW  ])[ktw  ];
-		real mfbcc = (D.f[TN  ])[ktn  ];
-		real mfbaa = (D.f[BS  ])[kbs  ];
-		real mfbca = (D.f[BN  ])[kbn  ];
-		real mfbac = (D.f[TS  ])[kts  ];
-		real mfbbb = (D.f[REST])[kzero];
-		real mfccc = (D.f[TNE ])[ktne ];
-		real mfaac = (D.f[TSW ])[ktsw ];
-		real mfcac = (D.f[TSE ])[ktse ];
-		real mfacc = (D.f[TNW ])[ktnw ];
-		real mfcca = (D.f[BNE ])[kbne ];
-		real mfaaa = (D.f[BSW ])[kbsw ];
-		real mfcaa = (D.f[BSE ])[kbse ];
-		real mfaca = (D.f[BNW ])[kbnw ];
+		real mfcbb = (D.f[DIR_P00   ])[ke   ];
+		real mfabb = (D.f[DIR_M00   ])[kw   ];
+		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
+		real mfbab = (D.f[DIR_0M0   ])[ks   ];
+		real mfbbc = (D.f[DIR_00P   ])[kt   ];
+		real mfbba = (D.f[DIR_00M   ])[kb   ];
+		real mfccb = (D.f[DIR_PP0  ])[kne  ];
+		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
+		real mfcab = (D.f[DIR_PM0  ])[kse  ];
+		real mfacb = (D.f[DIR_MP0  ])[knw  ];
+		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
+		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
+		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
+		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
+		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
+		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
+		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
+		real mfbac = (D.f[DIR_0MP  ])[kts  ];
+		real mfbbb = (D.f[DIR_000])[kzero];
+		real mfccc = (D.f[DIR_PPP ])[ktne ];
+		real mfaac = (D.f[DIR_MMP ])[ktsw ];
+		real mfcac = (D.f[DIR_PMP ])[ktse ];
+		real mfacc = (D.f[DIR_MPP ])[ktnw ];
+		real mfcca = (D.f[DIR_PPM ])[kbne ];
+		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
+		real mfcaa = (D.f[DIR_PMM ])[kbse ];
+		real mfaca = (D.f[DIR_MPM ])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////////
 		real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 					 	 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
diff --git a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
index 9b35d0e0ed9365112766f3ae4f46927b77361621..0079c927373e90c1e408d2c57ace0595bcfdff15 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/SlipBCs27.cu
@@ -1,13 +1,14 @@
 /* Device code */
 #include "LBM/LB.h" 
 #include "lbm/constants/D3Q27.h"
-#include <lbm/constants/NumericConstants.h>
+#include "lbm/constants/NumericConstants.h"
+#include "KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDevice27(real* DD, 
+__global__ void QSlipDevice27(real* DD, 
                                          int* k_Q, 
                                          real* QQ,
                                          unsigned int numberOfBCnodes,
@@ -21,63 +22,63 @@ extern "C" __global__ void QSlipDevice27(real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -98,32 +99,32 @@ extern "C" __global__ void QSlipDevice27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -155,37 +156,37 @@ extern "C" __global__ void QSlipDevice27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -205,67 +206,67 @@ extern "C" __global__ void QSlipDevice27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real fac = c1o1;//c99o100;
 	  real VeloX = fac*vx1;
@@ -283,8 +284,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 	     VeloZ = fac*vx3;
 		 x = true;
          feq=c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-feq*om1)/(c1o1-om1)+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-feq*om1)/(c1o1-om1)+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
@@ -295,8 +296,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 	     VeloZ = fac*vx3;
 		 x = true;
          feq=c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-feq*om1)/(c1o1-om1)+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-feq*om1)/(c1o1-om1)+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
@@ -307,8 +308,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 	     VeloZ = fac*vx3;
 		 y = true;
          feq=c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-feq*om1)/(c1o1-om1)+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-feq*om1)/(c1o1-om1)+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
@@ -319,8 +320,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 	     VeloZ = fac*vx3;
 		 y = true;
          feq=c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-feq*om1)/(c1o1-om1)+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-feq*om1)/(c1o1-om1)+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
@@ -331,8 +332,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 VeloZ = c0o1;
 		 z = true;
          feq=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-feq*om1)/(c1o1-om1)+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-feq*om1)/(c1o1-om1)+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
@@ -343,8 +344,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 VeloZ = c0o1;
 		 z = true;
          feq=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-feq*om1)/(c1o1-om1)+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-feq*om1)/(c1o1-om1)+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
@@ -356,8 +357,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (y == true) VeloY = c0o1;
          feq=c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-feq*om1)/(c1o1-om1)+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-feq*om1)/(c1o1-om1)+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
@@ -369,8 +370,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (y == true) VeloY = c0o1;
          feq=c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-feq*om1)/(c1o1-om1)+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-feq*om1)/(c1o1-om1)+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
@@ -382,8 +383,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (y == true) VeloY = c0o1;
          feq=c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-feq*om1)/(c1o1-om1)+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-feq*om1)/(c1o1-om1)+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
@@ -395,8 +396,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (y == true) VeloY = c0o1;
          feq=c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-feq*om1)/(c1o1-om1)+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-feq*om1)/(c1o1-om1)+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
@@ -408,8 +409,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-feq*om1)/(c1o1-om1)+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-feq*om1)/(c1o1-om1)+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
@@ -421,8 +422,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-feq*om1)/(c1o1-om1)+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-feq*om1)/(c1o1-om1)+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
@@ -434,8 +435,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-feq*om1)/(c1o1-om1)+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-feq*om1)/(c1o1-om1)+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
@@ -447,8 +448,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (x == true) VeloX = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-feq*om1)/(c1o1-om1)+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-feq*om1)/(c1o1-om1)+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
@@ -460,8 +461,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-feq*om1)/(c1o1-om1)+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-feq*om1)/(c1o1-om1)+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
@@ -473,8 +474,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-feq*om1)/(c1o1-om1)+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-feq*om1)/(c1o1-om1)+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
@@ -486,8 +487,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-feq*om1)/(c1o1-om1)+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-feq*om1)/(c1o1-om1)+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
@@ -499,8 +500,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-feq*om1)/(c1o1-om1)+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-feq*om1)/(c1o1-om1)+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
@@ -513,8 +514,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-feq*om1)/(c1o1-om1)+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-feq*om1)/(c1o1-om1)+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
@@ -527,8 +528,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-feq*om1)/(c1o1-om1)+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-feq*om1)/(c1o1-om1)+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
@@ -541,8 +542,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-feq*om1)/(c1o1-om1)+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-feq*om1)/(c1o1-om1)+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
@@ -555,8 +556,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-feq*om1)/(c1o1-om1)+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-feq*om1)/(c1o1-om1)+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
@@ -569,8 +570,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-feq*om1)/(c1o1-om1)+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-feq*om1)/(c1o1-om1)+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
@@ -583,8 +584,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-feq*om1)/(c1o1-om1)+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-feq*om1)/(c1o1-om1)+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
@@ -597,8 +598,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-feq*om1)/(c1o1-om1)+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-feq*om1)/(c1o1-om1)+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
@@ -611,8 +612,8 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 		 if (y == true) VeloY = c0o1;
 		 if (z == true) VeloZ = c0o1;
          feq=c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-feq*om1)/(c1o1-om1)+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-feq*om1)/(c1o1-om1)+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -657,143 +658,68 @@ extern "C" __global__ void QSlipDevice27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDeviceComp27(real* DD, 
-											 int* k_Q, 
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1, 
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-											 unsigned int size_Mat, 
-											 bool isEvenTimestep)
+__global__ void QSlipDeviceComp27(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
-   } 
-   else
-   {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
-   }
+   //! The slip boundary condition is executed in the following steps
+   //!
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
 
    const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
    if(k < numberOfBCnodes)
    {
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
       ////////////////////////////////////////////////////////////////////////////////
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-            *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-            *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-            *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-            *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+      
       ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
-      unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
       unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
+      unsigned int kne  = indexOfBCnode;
       unsigned int kse  = ks;
       unsigned int knw  = kw;
       unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
+      unsigned int kte  = indexOfBCnode;
       unsigned int kbe  = kb;
       unsigned int ktw  = kw;
       unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
+      unsigned int ktn  = indexOfBCnode;
       unsigned int kbn  = kb;
       unsigned int kts  = ks;
       unsigned int ktse = ks;
@@ -802,664 +728,1498 @@ extern "C" __global__ void QSlipDeviceComp27(real* DD,
       unsigned int kbse = kbs;
       unsigned int ktsw = ksw;
       unsigned int kbne = kb;
-      unsigned int ktne = KQK;
+      unsigned int ktne = indexOfBCnode;
       unsigned int kbsw = neighborZ[ksw];
       
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+
       ////////////////////////////////////////////////////////////////////////////////
-      real vx1, vx2, vx3, drho, feq, q;
-      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+      //! - Calculate macroscopic quantities
+      //!
+      real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                  f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
-      vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                (f_E - f_W)) / (c1o1 + drho); 
-         
+      real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                   (f_E - f_W)) / (c1o1 + drho);
 
-      vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                 ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                 (f_N - f_S)) / (c1o1 + drho); 
+      real vx2  = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                   (f_N - f_S)) / (c1o1 + drho);
 
-      vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-                 (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                 (f_T - f_B)) / (c1o1 + drho); 
+      real vx3  = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                   (f_T - f_B)) / (c1o1 + drho);
 
-      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
+      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
-      //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Multiply the local velocities by the slipLength
+      //!
+      real slipLength = c1o1;
+      real VeloX = slipLength*vx1;
+      real VeloY = slipLength*vx2;
+      real VeloZ = slipLength*vx3;
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      //!
+      real feq, q, velocityLB, velocityBC;
+
+      bool x = false;
+      bool y = false;
+      bool z = false;
+
+      q = (subgridD.q[DIR_P00])[k];
+      if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
-      } 
-      else
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloX;
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_M00])[k];
+      if (q>=c0o1 && q<=c1o1)
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloX;
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //Test
-      //(D.f[REST])[k]=c1o10;
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  real fac = c1o1;//c99o100;
-	  real VeloX = fac*vx1;
-	  real VeloY = fac*vx2;
-	  real VeloZ = fac*vx3;
-	  bool x = false;
-	  bool y = false;
-	  bool z = false;
 
-      q = q_dirE[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = c0o1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 x = true;
-         feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
-         //(D.f[W])[kw]=(one-q)/(one+q)*(f_E-feq*om1)/(one-om1)+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);
-         //(D.f[W])[kw]=zero;
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloY;
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = q_dirW[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = c0o1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 x = true;
-         feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
-         //(D.f[E])[ke]=(one-q)/(one+q)*(f_W-feq*om1)/(one-om1)+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);
-         //(D.f[E])[ke]=zero;
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloY;
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = q_dirN[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-		 VeloY = c0o1;
-	     VeloZ = fac*vx3;
-		 y = true;
-         feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
-         //(D.f[S])[ks]=(one-q)/(one+q)*(f_N-feq*om1)/(one-om1)+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);
-         //(D.f[S])[ks]=zero;
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloZ;
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = q_dirS[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-		 VeloY = c0o1;
-	     VeloZ = fac*vx3;
-		 y = true;
-         feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         //(D.f[N])[kn]=(one-q)/(one+q)*(f_S-feq*om1)/(one-om1)+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);
-         //(D.f[N])[kn]=zero;
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloZ;
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = q_dirT[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-		 VeloZ = c0o1;
-		 z = true;
-         feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
-         //(D.f[B])[kb]=(one-q)/(one+q)*(f_T-feq*om1)/(one-om1)+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);
-         //(D.f[B])[kb]=one;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloY;
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirB[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-		 VeloZ = c0o1;
-		 z = true;
-         feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
-         //(D.f[T])[kt]=(one-q)/(one+q)*(f_B-feq*om1)/(one-om1)+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);
-         //(D.f[T])[kt]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloY;
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirNE[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         //(D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-feq*om1)/(one-om1)+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);
-         //(D.f[SW])[ksw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloY;
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirSW[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         //(D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-feq*om1)/(one-om1)+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);
-         //(D.f[NE])[kne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloY;
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirSE[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         //(D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-feq*om1)/(one-om1)+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);
-         //(D.f[NW])[knw]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloZ;
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirNW[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         //(D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-feq*om1)/(one-om1)+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);
-         //(D.f[SE])[kse]=zero;
+        VeloX = slipLength*vx1;
+        VeloZ = slipLength*vx3;
+        if (x == true) VeloX = c0o1;
+        if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloZ;
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirTE[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-      //  if (k==10000) printf("AFTER x: %u \t  y: %u \t z: %u \n  VeloX: %f \t VeloY: %f \t VeloZ: %f \n\n", x,y,z, VeloX,VeloY,VeloZ);
-         feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         //(D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-feq*om1)/(one-om1)+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);
-         //(D.f[BW])[kbw]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloZ;
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirBW[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         //(D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-feq*om1)/(one-om1)+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);
-         //(D.f[TE])[kte]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloZ;
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirBE[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         //(D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-feq*om1)/(one-om1)+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);
-         //(D.f[TW])[ktw]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY + VeloZ;
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirTW[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         //(D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-feq*om1)/(one-om1)+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);
-         //(D.f[BE])[kbe]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY - VeloZ;
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirTN[k];
+
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         //(D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-feq*om1)/(one-om1)+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);
-         //(D.f[BS])[kbs]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY - VeloZ;
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirBS[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         //(D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-feq*om1)/(one-om1)+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);
-         //(D.f[TN])[ktn]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY + VeloZ;
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = q_dirBN[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         //(D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-feq*om1)/(one-om1)+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);
-         //(D.f[TS])[kts]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY + VeloZ;
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirTS[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         //(D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-feq*om1)/(one-om1)+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);
-         //(D.f[BN])[kbn]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY - VeloZ;
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirTNE[k];
+
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-feq*om1)/(one-om1)+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);
-         //(D.f[BSW])[kbsw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY - VeloZ;
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirBSW[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-feq*om1)/(one-om1)+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);
-         //(D.f[TNE])[ktne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY + VeloZ;
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirBNE[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-feq*om1)/(one-om1)+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);
-         //(D.f[TSW])[ktsw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY + VeloZ;
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirTSW[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-feq*om1)/(one-om1)+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);
-         //(D.f[BNE])[kbne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY - VeloZ;
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirTSE[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-feq*om1)/(one-om1)+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);
-         //(D.f[BNW])[kbnw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY - VeloZ;
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = q_dirBNW[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-feq*om1)/(one-om1)+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);
-         //(D.f[TSE])[ktse]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY + VeloZ;
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloBC(q, f_TNW, f_BSE, feq, omega, velocityBC, c1o216);
       }
+   }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-      q = q_dirBSE[k];
+//////////////////////////////////////////////////////////////////////////////
+__global__ void BBSlipDeviceComp27(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep)
+{
+   //! The slip boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
+
+   const unsigned nx = blockDim.x;
+   const unsigned ny = gridDim.x;
+
+   const unsigned k = nx*(ny*z + y) + x;
+
+   if(k < numberOfBCnodes)
+   {
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+      
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
+      unsigned int ksw  = neighborY[kw];
+      unsigned int kne  = indexOfBCnode;
+      unsigned int kse  = ks;
+      unsigned int knw  = kw;
+      unsigned int kbw  = neighborZ[kw];
+      unsigned int kte  = indexOfBCnode;
+      unsigned int kbe  = kb;
+      unsigned int ktw  = kw;
+      unsigned int kbs  = neighborZ[ks];
+      unsigned int ktn  = indexOfBCnode;
+      unsigned int kbn  = kb;
+      unsigned int kts  = ks;
+      unsigned int ktse = ks;
+      unsigned int kbnw = kbw;
+      unsigned int ktnw = kw;
+      unsigned int kbse = kbs;
+      unsigned int ktsw = ksw;
+      unsigned int kbne = kb;
+      unsigned int ktne = indexOfBCnode;
+      unsigned int kbsw = neighborZ[ksw];
+      
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Calculate macroscopic quantities
+      //!
+      real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                  f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
+
+      real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                   (f_E - f_W)) / (c1o1 + drho);
+
+      real vx2  = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                   (f_N - f_S)) / (c1o1 + drho);
+
+      real vx3  = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                   (f_T - f_B)) / (c1o1 + drho);
+
+      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Multiply the local velocities by the slipLength
+      //!
+      real slipLength = c1o1;
+      real VeloX = slipLength*vx1;
+      real VeloY = slipLength*vx2;
+      real VeloZ = slipLength*vx3;
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      //!
+      real q, velocityBC;
+
+      bool x = false;
+      bool y = false;
+      bool z = false;
+
+      q = (subgridD.q[DIR_P00])[k];
+      if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
+      {
+         VeloX = c0o1;
+         x = true;
+
+         velocityBC = VeloX;
+         (dist.f[DIR_M00])[kw] = getBounceBackDistributionForVeloBC(f_W, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-feq*om1)/(one-om1)+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);
-         //(D.f[TNW])[ktnw]=zero;
+         VeloX = c0o1;
+         x = true;
+
+         velocityBC = -VeloX;
+         (dist.f[DIR_P00])[ke] = getBounceBackDistributionForVeloBC(f_E, velocityBC, c2o27);
       }
 
-      q = q_dirTNW[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-feq*om1)/(one-om1)+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);
-         //(D.f[BSE])[kbse]=zero;
+         VeloY = c0o1;
+         y = true;
+
+         velocityBC = VeloY;
+         (dist.f[DIR_0M0])[ks] = getBounceBackDistributionForVeloBC(f_S, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_0M0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = c0o1;
+         y = true;
+
+         velocityBC = -VeloY;
+         (dist.f[DIR_0P0])[kn] = getBounceBackDistributionForVeloBC(f_N, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_00P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloZ = c0o1;
+         z = true;
+
+         velocityBC = VeloZ;
+         (dist.f[DIR_00M])[kb] = getBounceBackDistributionForVeloBC(f_B, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_00M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloZ = c0o1;
+         z = true;
+
+         velocityBC = -VeloZ;
+         (dist.f[DIR_00P])[kt] = getBounceBackDistributionForVeloBC(f_T, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_PP0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityBC = VeloX + VeloY;
+         (dist.f[DIR_MM0])[ksw] = getBounceBackDistributionForVeloBC(f_SW, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_MM0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityBC = -VeloX - VeloY;
+         (dist.f[DIR_PP0])[kne] = getBounceBackDistributionForVeloBC(f_NE, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_PM0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityBC = VeloX - VeloY;
+         (dist.f[DIR_MP0])[knw] = getBounceBackDistributionForVeloBC(f_NW, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_MP0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityBC = -VeloX + VeloY;
+         (dist.f[DIR_PM0])[kse] = getBounceBackDistributionForVeloBC(f_SE, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_P0P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX + VeloZ;
+         (dist.f[DIR_M0M])[kbw] = getBounceBackDistributionForVeloBC(f_BW, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_M0M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+        VeloX = slipLength*vx1;
+        VeloZ = slipLength*vx3;
+        if (x == true) VeloX = c0o1;
+        if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX - VeloZ;
+         (dist.f[DIR_P0P])[kte] = getBounceBackDistributionForVeloBC(f_TE, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_P0M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX - VeloZ;
+         (dist.f[DIR_M0P])[ktw] = getBounceBackDistributionForVeloBC(f_TW, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_M0P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX + VeloZ;
+         (dist.f[DIR_P0M])[kbe] = getBounceBackDistributionForVeloBC(f_BE, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0PP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloY + VeloZ;
+         (dist.f[DIR_0MM])[kbs] = getBounceBackDistributionForVeloBC(f_BS, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0MM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloY - VeloZ;
+         (dist.f[DIR_0PP])[ktn] = getBounceBackDistributionForVeloBC(f_TN, velocityBC, c1o54);
+      }
+
+
+      q = (subgridD.q[DIR_0PM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloY - VeloZ;
+         (dist.f[DIR_0MP])[kts] = getBounceBackDistributionForVeloBC(f_TS, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0MP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloY + VeloZ;
+         (dist.f[DIR_0PM])[kbn] = getBounceBackDistributionForVeloBC(f_BN, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_PPP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX + VeloY + VeloZ;
+         (dist.f[DIR_MMM])[kbsw] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MMM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX - VeloY - VeloZ;
+         (dist.f[DIR_PPP])[ktne] = getBounceBackDistributionForVeloBC(f_TNE, velocityBC, c1o216);
+      }
+
+
+      q = (subgridD.q[DIR_PPM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX + VeloY - VeloZ;
+         (dist.f[DIR_MMP])[ktsw] = getBounceBackDistributionForVeloBC(f_TSW, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MMP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX - VeloY + VeloZ;
+         (dist.f[DIR_PPM])[kbne] = getBounceBackDistributionForVeloBC(f_BNE, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_PMP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX - VeloY + VeloZ;
+         (dist.f[DIR_MPM])[kbnw] = getBounceBackDistributionForVeloBC(f_BNW, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MPM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX + VeloY - VeloZ;
+         (dist.f[DIR_PMP])[ktse] = getBounceBackDistributionForVeloBC(f_TSE, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_PMM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = VeloX - VeloY - VeloZ;
+         (dist.f[DIR_MPP])[ktnw] = getBounceBackDistributionForVeloBC(f_TNW, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MPP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityBC = -VeloX + VeloY + VeloZ;
+         (dist.f[DIR_PMM])[kbse] = getBounceBackDistributionForVeloBC(f_BSE, velocityBC, c1o216);
       }
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD, 
-											 int* k_Q, 
-											 real* QQ,
-											 unsigned int numberOfBCnodes,
-											 real om1, 
-											 unsigned int* neighborX,
-											 unsigned int* neighborY,
-											 unsigned int* neighborZ,
-                                  real* turbViscosity,
-											 unsigned int size_Mat, 
-											 bool isEvenTimestep)
+
+
+////////////////////////////////////////////////////////////////////////////
+__global__ void QSlipDeviceComp27TurbViscosity(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* turbViscosity,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep)
 {
-   Distributions27 D;
-   if (isEvenTimestep==true)
-   {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
-   } 
-   else
+   //! The slip boundary condition is executed in the following steps
+   //!
+   ////////////////////////////////////////////////////////////////////////////////
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
+
+   const unsigned nx = blockDim.x;
+   const unsigned ny = gridDim.x;
+
+   const unsigned k = nx*(ny*z + y) + x;
+
+   if(k < numberOfBCnodes)
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+      
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+      
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
+      unsigned int ksw  = neighborY[kw];
+      unsigned int kne  = indexOfBCnode;
+      unsigned int kse  = ks;
+      unsigned int knw  = kw;
+      unsigned int kbw  = neighborZ[kw];
+      unsigned int kte  = indexOfBCnode;
+      unsigned int kbe  = kb;
+      unsigned int ktw  = kw;
+      unsigned int kbs  = neighborZ[ks];
+      unsigned int ktn  = indexOfBCnode;
+      unsigned int kbn  = kb;
+      unsigned int kts  = ks;
+      unsigned int ktse = ks;
+      unsigned int kbnw = kbw;
+      unsigned int ktnw = kw;
+      unsigned int kbse = kbs;
+      unsigned int ktsw = ksw;
+      unsigned int kbne = kb;
+      unsigned int ktne = indexOfBCnode;
+      unsigned int kbsw = neighborZ[ksw];
+      
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Calculate macroscopic quantities
+      //!
+      real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                  f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
+
+      real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                   (f_E - f_W)) / (c1o1 + drho);
+
+      real vx2  = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                   (f_N - f_S)) / (c1o1 + drho);
+
+      real vx3  = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                   (f_T - f_B)) / (c1o1 + drho);
+
+      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - compute local relaxation rate
+      //!
+      real om_turb = omega / (c1o1 + c3o1* omega* max(c0o1, turbViscosity[indexOfBCnode]) );
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Multiply the local velocities by the slipLength
+      //!
+      real slipLength = c1o1;
+      real VeloX = slipLength*vx1;
+      real VeloY = slipLength*vx2;
+      real VeloZ = slipLength*vx3;
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      //!
+      real feq, q, velocityLB, velocityBC;
+
+      bool x = false;
+      bool y = false;
+      bool z = false;
+
+      q = (subgridD.q[DIR_P00])[k];
+      if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
+      {
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloX;
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_M00])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloX;
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_0P0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloY;
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_0M0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloY;
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_00P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloZ;
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_00M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloZ;
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, om_turb, velocityBC, c2o27);
+      }
+
+      q = (subgridD.q[DIR_PP0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloY;
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_MM0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloY;
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_PM0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloY;
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_MP0])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloY;
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_P0P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloZ;
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_M0M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+        VeloX = slipLength*vx1;
+        VeloZ = slipLength*vx3;
+        if (x == true) VeloX = c0o1;
+        if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloZ;
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_P0M])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloZ;
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_M0P])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloZ;
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0PP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY + VeloZ;
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0MM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY - VeloZ;
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, om_turb, velocityBC, c1o54);
+      }
+
+
+      q = (subgridD.q[DIR_0PM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY - VeloZ;
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_0MP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY + VeloZ;
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, om_turb, velocityBC, c1o54);
+      }
+
+      q = (subgridD.q[DIR_PPP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY + VeloZ;
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MMM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY - VeloZ;
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, om_turb, velocityBC, c1o216);
+      }
+
+
+      q = (subgridD.q[DIR_PPM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY - VeloZ;
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MMP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY + VeloZ;
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_PMP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY + VeloZ;
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MPM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY - VeloZ;
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_PMM])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY - VeloZ;
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, om_turb, velocityBC, c1o216);
+      }
+
+      q = (subgridD.q[DIR_MPP])[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY + VeloZ;
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloBC(q, f_TNW, f_BSE, feq, om_turb, velocityBC, c1o216);
+      }
    }
+}
+
+
+////////////////////////////////////////////////////////////////////////////
+__global__ void QSlipPressureDeviceComp27TurbViscosity(
+                                    real* distributions, 
+                                    int* subgridDistanceIndices, 
+                                    real* subgridDistances,
+                                    unsigned int numberOfBCnodes,
+                                    real omega, 
+                                    unsigned int* neighborX,
+                                    unsigned int* neighborY,
+                                    unsigned int* neighborZ,
+                                    real* turbViscosity,
+                                    unsigned int numberOfLBnodes, 
+                                    bool isEvenTimestep)
+{
+   //! The slip boundary condition is executed in the following steps
+   //!
    ////////////////////////////////////////////////////////////////////////////////
-   const unsigned  x = threadIdx.x;  // Globaler x-Index 
-   const unsigned  y = blockIdx.x;   // Globaler y-Index 
-   const unsigned  z = blockIdx.y;   // Globaler z-Index 
+   //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+   //!
+   const unsigned  x = threadIdx.x;  // global x-index 
+   const unsigned  y = blockIdx.x;   // global y-index 
+   const unsigned  z = blockIdx.y;   // global z-index 
 
    const unsigned nx = blockDim.x;
    const unsigned ny = gridDim.x;
 
    const unsigned k = nx*(ny*z + y) + x;
-   //////////////////////////////////////////////////////////////////////////
 
-   if(k<numberOfBCnodes)
+   if(k < numberOfBCnodes)
    {
+      //////////////////////////////////////////////////////////////////////////
+      //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on timestep is based on the esoteric twist algorithm \ref
+      //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017), DOI:10.3390/computation5020019 ]</b></a>
+      //!
+      Distributions27 dist;
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, isEvenTimestep);
+      
       ////////////////////////////////////////////////////////////////////////////////
-      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
-            *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
-            *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
-            *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
-            *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      //! - Set local subgrid distances (q's)
+      //!
+      SubgridDistances27 subgridD;
+      getPointersToSubgridDistances(subgridD, subgridDistances, numberOfBCnodes);
+      
       ////////////////////////////////////////////////////////////////////////////////
-      //index
-      unsigned int KQK  = k_Q[k];
-      unsigned int kzero= KQK;
-      unsigned int ke   = KQK;
-      unsigned int kw   = neighborX[KQK];
-      unsigned int kn   = KQK;
-      unsigned int ks   = neighborY[KQK];
-      unsigned int kt   = KQK;
-      unsigned int kb   = neighborZ[KQK];
+      //! - Set neighbor indices (necessary for indirect addressing)
+      //!
+      unsigned int indexOfBCnode  = subgridDistanceIndices[k];
+      unsigned int kzero= indexOfBCnode;
+      unsigned int ke   = indexOfBCnode;
+      unsigned int kw   = neighborX[indexOfBCnode];
+      unsigned int kn   = indexOfBCnode;
+      unsigned int ks   = neighborY[indexOfBCnode];
+      unsigned int kt   = indexOfBCnode;
+      unsigned int kb   = neighborZ[indexOfBCnode];
       unsigned int ksw  = neighborY[kw];
-      unsigned int kne  = KQK;
+      unsigned int kne  = indexOfBCnode;
       unsigned int kse  = ks;
       unsigned int knw  = kw;
       unsigned int kbw  = neighborZ[kw];
-      unsigned int kte  = KQK;
+      unsigned int kte  = indexOfBCnode;
       unsigned int kbe  = kb;
       unsigned int ktw  = kw;
       unsigned int kbs  = neighborZ[ks];
-      unsigned int ktn  = KQK;
+      unsigned int ktn  = indexOfBCnode;
       unsigned int kbn  = kb;
       unsigned int kts  = ks;
       unsigned int ktse = ks;
@@ -1468,527 +2228,1118 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
       unsigned int kbse = kbs;
       unsigned int ktsw = ksw;
       unsigned int kbne = kb;
-      unsigned int ktne = KQK;
+      unsigned int ktne = indexOfBCnode;
       unsigned int kbsw = neighborZ[ksw];
       
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      //! - Set local distributions
+      //!
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
+
       ////////////////////////////////////////////////////////////////////////////////
-      real vx1, vx2, vx3, drho, feq, q;
-      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
-                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+      //! - Calculate macroscopic quantities
+      //!
+      real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                  f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
-      vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
-                (f_E - f_W)) / (c1o1 + drho); 
-         
+      real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                   (f_E - f_W)) / (c1o1 + drho);
 
-      vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
-                 ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
-                 (f_N - f_S)) / (c1o1 + drho); 
+      real vx2  = ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                   ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                   (f_N - f_S)) / (c1o1 + drho);
 
-      vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
-                 (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
-                 (f_T - f_B)) / (c1o1 + drho); 
+      real vx3  = (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                   (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                   (f_T - f_B)) / (c1o1 + drho);
 
-      real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
+      real cu_sq = c3o2 * (vx1 * vx1 + vx2 * vx2 + vx3 * vx3) * (c1o1 + drho);
 
-      //////////////////////////////////////////////////////////////////////////
-      if (isEvenTimestep==false)
-      {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
-      } 
-      else
-      {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
-      }
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-      //Test
-      //(D.f[REST])[k]=c1o10;
-      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]]));
-     
-     real fac = c1o1;//c99o100;
-	  real VeloX = fac*vx1;
-	  real VeloY = fac*vx2;
-	  real VeloZ = fac*vx3;
-	  bool x = false;
-	  bool y = false;
-	  bool z = false;
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - change the pointer to write the results in the correct array
+      //!
+      getPointersToDistributions(dist, distributions, numberOfLBnodes, !isEvenTimestep);
 
-      q = q_dirE[k];
-      if (q>=c0o1 && q<=c1o1)
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - compute local relaxation rate
+      //!
+      real om_turb = omega / (c1o1 + c3o1* omega* max(c0o1, turbViscosity[indexOfBCnode]) );
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Multiply the local velocities by the slipLength
+      //!
+      real slipLength = c1o1;
+      real VeloX = slipLength*vx1;
+      real VeloY = slipLength*vx2;
+      real VeloZ = slipLength*vx3;
+
+      ////////////////////////////////////////////////////////////////////////////////
+      //! - Update distributions with subgrid distance (q) between zero and one
+      //!
+      real feq, q, velocityLB, velocityBC;
+
+      bool x = false;
+      bool y = false;
+      bool z = false;
+
+      q = (subgridD.q[DIR_P00])[k];
+      if (q>=c0o1 && q<=c1o1)  // only update distribution for q between zero and one
       {
-		 VeloX = c0o1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 x = true;
-         feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
-         //(D.f[W])[kw]=(one-q)/(one+q)*(f_E-feq*om1)/(one-om1)+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);
-         //(D.f[W])[kw]=zero;
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloX;
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirW[k];
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = c0o1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 x = true;
-         feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
-         //(D.f[E])[ke]=(one-q)/(one+q)*(f_W-feq*om_turb)/(one-om_turb)+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);
-         //(D.f[E])[ke]=zero;
+         VeloX = c0o1;
+         x = true;
+
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloX;
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirN[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-		 VeloY = c0o1;
-	     VeloZ = fac*vx3;
-		 y = true;
-         feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
-         //(D.f[S])[ks]=(one-q)/(one+q)*(f_N-feq*om_turb)/(one-om_turb)+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);
-         //(D.f[S])[ks]=zero;
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloY;
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirS[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-		 VeloY = c0o1;
-	     VeloZ = fac*vx3;
-		 y = true;
-         feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         //(D.f[N])[kn]=(one-q)/(one+q)*(f_S-feq*om_turb)/(one-om_turb)+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);
-         //(D.f[N])[kn]=zero;
+         VeloY = c0o1;
+         y = true;
+
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloY;
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirT[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-		 VeloZ = c0o1;
-		 z = true;
-         feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
-         //(D.f[B])[kb]=(one-q)/(one+q)*(f_T-feq*om_turb)/(one-om_turb)+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);
-         //(D.f[B])[kb]=one;
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = VeloZ;
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirB[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-		 VeloZ = c0o1;
-		 z = true;
-         feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
-         //feq=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
-         //(D.f[T])[kt]=(one-q)/(one+q)*(f_B-feq*om_turb)/(one-om_turb)+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);
-         //(D.f[T])[kt]=zero;
+         VeloZ = c0o1;
+         z = true;
+
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         velocityBC = -VeloZ;
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, om_turb, drho, velocityBC, c2o27);
       }
 
-      q = q_dirNE[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         //(D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-feq*om_turb)/(one-om_turb)+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);
-         //(D.f[SW])[ksw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloY;
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirSW[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         //(D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-feq*om_turb)/(one-om_turb)+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);
-         //(D.f[NE])[kne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloY;
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirSE[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         //(D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-feq*om_turb)/(one-om_turb)+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);
-         //(D.f[NW])[knw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloY;
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirNW[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         //(D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-feq*om_turb)/(one-om_turb)+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);
-         //(D.f[SE])[kse]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloY;
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTE[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-      //  if (k==10000) printf("AFTER x: %u \t  y: %u \t z: %u \n  VeloX: %f \t VeloY: %f \t VeloZ: %f \n\n", x,y,z, VeloX,VeloY,VeloZ);
-         feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         //(D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-feq*om_turb)/(one-om_turb)+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);
-         //(D.f[BW])[kbw]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX + VeloZ;
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBW[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         //(D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-feq*om_turb)/(one-om_turb)+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);
-         //(D.f[TE])[kte]=zero;
+        VeloX = slipLength*vx1;
+        VeloZ = slipLength*vx3;
+        if (x == true) VeloX = c0o1;
+        if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX - VeloZ;
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBE[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         //(D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-feq*om_turb)/(one-om_turb)+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);
-         //(D.f[TW])[ktw]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloX - VeloZ;
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTW[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         //(D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-feq*om_turb)/(one-om_turb)+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);
-         //(D.f[BE])[kbe]=zero;
+         VeloX = slipLength*vx1;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloX + VeloZ;
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTN[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         //(D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-feq*om_turb)/(one-om_turb)+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);
-         //(D.f[BS])[kbs]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY + VeloZ;
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBS[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         //(D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-feq*om_turb)/(one-om_turb)+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);
-         //(D.f[TN])[ktn]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY - VeloZ;
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirBN[k];
+
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         //(D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-feq*om_turb)/(one-om_turb)+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);
-         //(D.f[TS])[kts]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = VeloY - VeloZ;
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTS[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
-         //feq=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         //(D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-feq*om_turb)/(one-om_turb)+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);
-         //(D.f[BN])[kbn]=zero;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         velocityBC = -VeloY + VeloZ;
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, om_turb, drho, velocityBC, c1o54);
       }
 
-      q = q_dirTNE[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-feq*om_turb)/(one-om_turb)+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);
-         //(D.f[BSW])[kbsw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY + VeloZ;
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBSW[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-feq*om_turb)/(one-om_turb)+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);
-         //(D.f[TNE])[ktne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY - VeloZ;
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBNE[k];
+
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-feq*om_turb)/(one-om_turb)+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);
-         //(D.f[TSW])[ktsw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX + VeloY - VeloZ;
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTSW[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-feq*om_turb)/(one-om_turb)+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);
-         //(D.f[BNE])[kbne]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX - VeloY + VeloZ;
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTSE[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         //(D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-feq*om_turb)/(one-om_turb)+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);
-         //(D.f[BNW])[kbnw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY + VeloZ;
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBNW[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         //(D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-feq*om_turb)/(one-om_turb)+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);
-         //(D.f[TSE])[ktse]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY - VeloZ;
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirBSE[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         //(D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-feq*om_turb)/(one-om_turb)+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);
-         //(D.f[TNW])[ktnw]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = VeloX - VeloY - VeloZ;
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, om_turb, drho, velocityBC, c1o216);
       }
 
-      q = q_dirTNW[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
-		 VeloX = fac*vx1;
-	     VeloY = fac*vx2;
-	     VeloZ = fac*vx3;
-		 if (x == true) VeloX = c0o1;
-		 if (y == true) VeloY = c0o1;
-		 if (z == true) VeloZ = c0o1;
-         feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
-         //feq=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         //(D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-feq*om_turb)/(one-om_turb)+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);
-         //(D.f[BSE])[kbse]=zero;
+         VeloX = slipLength*vx1;
+         VeloY = slipLength*vx2;
+         VeloZ = slipLength*vx3;
+         if (x == true) VeloX = c0o1;
+         if (y == true) VeloY = c0o1;
+         if (z == true) VeloZ = c0o1;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         velocityBC = -VeloX + VeloY + VeloZ;
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNW, f_BSE, feq, om_turb, drho, velocityBC, c1o216);
       }
    }
 }
 
+// __global__ void QSlipDeviceComp27TurbViscosity(real* DD, 
+// 											 int* k_Q, 
+// 											 real* QQ,
+// 											 unsigned int numberOfBCnodes,
+// 											 real om1, 
+// 											 unsigned int* neighborX,
+// 											 unsigned int* neighborY,
+// 											 unsigned int* neighborZ,
+//                                   real* turbViscosity,
+// 											 unsigned int size_Mat, 
+// 											 bool isEvenTimestep)
+// {
+//    Distributions27 D;
+//    if (isEvenTimestep==true)
+//    {
+//       D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+//       D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+//       D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+//       D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+//       D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+//       D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+//       D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+//       D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+//       D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+//       D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+//       D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+//       D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+//       D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+//       D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+//       D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+//       D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+//       D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+//       D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
+//       D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+//       D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+//       D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+//       D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+//       D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+//       D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+//       D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+//       D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//    } 
+//    else
+//    {
+//       D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+//       D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+//       D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+//       D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+//       D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+//       D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+//       D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+//       D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+//       D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+//       D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+//       D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+//       D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+//       D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+//       D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+//       D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+//       D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+//       D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+//       D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+//       D.f[DIR_000] = &DD[DIR_000*size_Mat];
+//       D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+//       D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+//       D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+//       D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+//       D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+//       D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+//       D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+//       D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//    }
+//    ////////////////////////////////////////////////////////////////////////////////
+//    const unsigned  x = threadIdx.x;  // Globaler x-Index 
+//    const unsigned  y = blockIdx.x;   // Globaler y-Index 
+//    const unsigned  z = blockIdx.y;   // Globaler z-Index 
+
+//    const unsigned nx = blockDim.x;
+//    const unsigned ny = gridDim.x;
+
+//    const unsigned k = nx*(ny*z + y) + x;
+//    //////////////////////////////////////////////////////////////////////////
+
+//    if(k<numberOfBCnodes)
+//    {
+//       ////////////////////////////////////////////////////////////////////////////////
+//       real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB, 
+//             *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
+//             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
+//             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
+//             *q_dirBSE, *q_dirBNW; 
+//       q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+//       q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+//       q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+//       q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+//       q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+//       q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+//       q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+//       q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+//       q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+//       q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+//       q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+//       q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+//       q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+//       q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+//       q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+//       q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+//       q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+//       q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+//       q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+//       q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+//       q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+//       q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+//       q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+//       q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+//       q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+//       q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
+//       ////////////////////////////////////////////////////////////////////////////////
+//       //index
+//       unsigned int KQK  = k_Q[k];
+//       unsigned int kzero= KQK;
+//       unsigned int ke   = KQK;
+//       unsigned int kw   = neighborX[KQK];
+//       unsigned int kn   = KQK;
+//       unsigned int ks   = neighborY[KQK];
+//       unsigned int kt   = KQK;
+//       unsigned int kb   = neighborZ[KQK];
+//       unsigned int ksw  = neighborY[kw];
+//       unsigned int kne  = KQK;
+//       unsigned int kse  = ks;
+//       unsigned int knw  = kw;
+//       unsigned int kbw  = neighborZ[kw];
+//       unsigned int kte  = KQK;
+//       unsigned int kbe  = kb;
+//       unsigned int ktw  = kw;
+//       unsigned int kbs  = neighborZ[ks];
+//       unsigned int ktn  = KQK;
+//       unsigned int kbn  = kb;
+//       unsigned int kts  = ks;
+//       unsigned int ktse = ks;
+//       unsigned int kbnw = kbw;
+//       unsigned int ktnw = kw;
+//       unsigned int kbse = kbs;
+//       unsigned int ktsw = ksw;
+//       unsigned int kbne = kb;
+//       unsigned int ktne = KQK;
+//       unsigned int kbsw = neighborZ[ksw];
+      
+//       ////////////////////////////////////////////////////////////////////////////////
+//       real f_W    = (D.f[DIR_P00   ])[ke   ];
+//       real f_E    = (D.f[DIR_M00   ])[kw   ];
+//       real f_S    = (D.f[DIR_0P0   ])[kn   ];
+//       real f_N    = (D.f[DIR_0M0   ])[ks   ];
+//       real f_B    = (D.f[DIR_00P   ])[kt   ];
+//       real f_T    = (D.f[DIR_00M   ])[kb   ];
+//       real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+//       real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+//       real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+//       real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+//       real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+//       real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+//       real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+//       real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+//       real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+//       real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+//       real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+//       real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+//       real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+//       real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+//       real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+//       real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+//       real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+//       real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+//       real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+//       real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+//       ////////////////////////////////////////////////////////////////////////////////
+//       real vx1, vx2, vx3, drho, feq, q;
+//       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+//                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
+//                 f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
+
+//       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+//                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+//                 (f_E - f_W)) / (c1o1 + drho); 
+         
+
+//       vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+//                  ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+//                  (f_N - f_S)) / (c1o1 + drho); 
+
+//       vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+//                  (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+//                  (f_T - f_B)) / (c1o1 + drho); 
+
+//       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3) * (c1o1 + drho);
+
+//       //////////////////////////////////////////////////////////////////////////
+//       if (isEvenTimestep==false)
+//       {
+//          D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+//          D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+//          D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+//          D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+//          D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+//          D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+//          D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+//          D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+//          D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+//          D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+//          D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+//          D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+//          D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+//          D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+//          D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+//          D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+//          D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+//          D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
+//          D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+//          D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+//          D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+//          D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+//          D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+//          D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+//          D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+//          D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+//       } 
+//       else
+//       {
+//          D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+//          D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+//          D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+//          D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+//          D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+//          D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+//          D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+//          D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+//          D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+//          D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+//          D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+//          D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+//          D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+//          D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+//          D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+//          D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+//          D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+//          D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+//          D.f[DIR_000] = &DD[DIR_000*size_Mat];
+//          D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+//          D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+//          D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+//          D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+//          D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+//          D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+//          D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+//          D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+//       }
+//       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//       //Test
+//       //(D.f[DIR_000])[k]=c1o10;
+//       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 	  real om_turb = om1 / (c1o1 + c3o1*om1*max(c0o1, turbViscosity[k_Q[k]]));
+     
+//      real fac = c1o1;//c99o100;
+// 	  real VeloX = fac*vx1;
+// 	  real VeloY = fac*vx2;
+// 	  real VeloZ = fac*vx3;
+// 	  bool x = false;
+// 	  bool y = false;
+// 	  bool z = false;
+
+//       q = q_dirE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = c0o1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 x = true;
+//          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq); 
+//          //(D.f[DIR_M00])[kw]=(one-q)/(one+q)*(f_E-feq*om1)/(one-om1)+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);
+//          //(D.f[DIR_M00])[kw]=zero;
+//       }
+
+//       q = q_dirW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = c0o1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 x = true;
+//          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
+//          //(D.f[DIR_P00])[ke]=(one-q)/(one+q)*(f_W-feq*om_turb)/(one-om_turb)+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);
+//          //(D.f[DIR_P00])[ke]=zero;
+//       }
+
+//       q = q_dirN[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 		 VeloY = c0o1;
+// 	     VeloZ = fac*vx3;
+// 		 y = true;
+//          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
+//          //(D.f[DIR_0M0])[ks]=(one-q)/(one+q)*(f_N-feq*om_turb)/(one-om_turb)+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);
+//          //(D.f[DIR_0M0])[ks]=zero;
+//       }
+
+//       q = q_dirS[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 		 VeloY = c0o1;
+// 	     VeloZ = fac*vx3;
+// 		 y = true;
+//          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
+//          //(D.f[DIR_0P0])[kn]=(one-q)/(one+q)*(f_S-feq*om_turb)/(one-om_turb)+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);
+//          //(D.f[DIR_0P0])[kn]=zero;
+//       }
+
+//       q = q_dirT[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 		 VeloZ = c0o1;
+// 		 z = true;
+//          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq); 
+//          //(D.f[DIR_00M])[kb]=(one-q)/(one+q)*(f_T-feq*om_turb)/(one-om_turb)+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);
+//          //(D.f[DIR_00M])[kb]=one;
+//       }
+
+//       q = q_dirB[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 		 VeloZ = c0o1;
+// 		 z = true;
+//          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
+//          //feq=c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
+//          //(D.f[DIR_00P])[kt]=(one-q)/(one+q)*(f_B-feq*om_turb)/(one-om_turb)+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);
+//          //(D.f[DIR_00P])[kt]=zero;
+//       }
+
+//       q = q_dirNE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+//          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
+//          //(D.f[DIR_MM0])[ksw]=(one-q)/(one+q)*(f_NE-feq*om_turb)/(one-om_turb)+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);
+//          //(D.f[DIR_MM0])[ksw]=zero;
+//       }
+
+//       q = q_dirSW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+//          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
+//          //(D.f[DIR_PP0])[kne]=(one-q)/(one+q)*(f_SW-feq*om_turb)/(one-om_turb)+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);
+//          //(D.f[DIR_PP0])[kne]=zero;
+//       }
+
+//       q = q_dirSE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+//          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
+//          //(D.f[DIR_MP0])[knw]=(one-q)/(one+q)*(f_SE-feq*om_turb)/(one-om_turb)+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);
+//          //(D.f[DIR_MP0])[knw]=zero;
+//       }
+
+//       q = q_dirNW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+//          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
+//          //(D.f[DIR_PM0])[kse]=(one-q)/(one+q)*(f_NW-feq*om_turb)/(one-om_turb)+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);
+//          //(D.f[DIR_PM0])[kse]=zero;
+//       }
+
+//       q = q_dirTE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//       //  if (k==10000) printf("AFTER x: %u \t  y: %u \t z: %u \n  VeloX: %f \t VeloY: %f \t VeloZ: %f \n\n", x,y,z, VeloX,VeloY,VeloZ);
+//          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
+//          //(D.f[DIR_M0M])[kbw]=(one-q)/(one+q)*(f_TE-feq*om_turb)/(one-om_turb)+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);
+//          //(D.f[DIR_M0M])[kbw]=zero;
+//       }
+
+//       q = q_dirBW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
+//          //(D.f[DIR_P0P])[kte]=(one-q)/(one+q)*(f_BW-feq*om_turb)/(one-om_turb)+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);
+//          //(D.f[DIR_P0P])[kte]=zero;
+//       }
+
+//       q = q_dirBE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
+//          //(D.f[DIR_M0P])[ktw]=(one-q)/(one+q)*(f_BE-feq*om_turb)/(one-om_turb)+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);
+//          //(D.f[DIR_M0P])[ktw]=zero;
+//       }
+
+//       q = q_dirTW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
+//          //(D.f[DIR_P0M])[kbe]=(one-q)/(one+q)*(f_TW-feq*om_turb)/(one-om_turb)+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);
+//          //(D.f[DIR_P0M])[kbe]=zero;
+//       }
+
+//       q = q_dirTN[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_0MM])[kbs]=(one-q)/(one+q)*(f_TN-feq*om_turb)/(one-om_turb)+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_0MM])[kbs]=zero;
+//       }
+
+//       q = q_dirBS[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_0PP])[ktn]=(one-q)/(one+q)*(f_BS-feq*om_turb)/(one-om_turb)+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_0PP])[ktn]=zero;
+//       }
+
+//       q = q_dirBN[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_0MP])[kts]=(one-q)/(one+q)*(f_BN-feq*om_turb)/(one-om_turb)+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_0MP])[kts]=zero;
+//       }
+
+//       q = q_dirTS[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
+//          //feq=c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_0PM])[kbn]=(one-q)/(one+q)*(f_TS-feq*om_turb)/(one-om_turb)+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_0PM])[kbn]=zero;
+//       }
+
+//       q = q_dirTNE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_MMM])[kbsw]=(one-q)/(one+q)*(f_TNE-feq*om_turb)/(one-om_turb)+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_MMM])[kbsw]=zero;
+//       }
+
+//       q = q_dirBSW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_PPP])[ktne]=(one-q)/(one+q)*(f_BSW-feq*om_turb)/(one-om_turb)+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_PPP])[ktne]=zero;
+//       }
+
+//       q = q_dirBNE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_MMP])[ktsw]=(one-q)/(one+q)*(f_BNE-feq*om_turb)/(one-om_turb)+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_MMP])[ktsw]=zero;
+//       }
+
+//       q = q_dirTSW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_PPM])[kbne]=(one-q)/(one+q)*(f_TSW-feq*om_turb)/(one-om_turb)+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_PPM])[kbne]=zero;
+//       }
+
+//       q = q_dirTSE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_MPM])[kbnw]=(one-q)/(one+q)*(f_TSE-feq*om_turb)/(one-om_turb)+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_MPM])[kbnw]=zero;
+//       }
+
+//       q = q_dirBNW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_PMP])[ktse]=(one-q)/(one+q)*(f_BNW-feq*om_turb)/(one-om_turb)+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_PMP])[ktse]=zero;
+//       }
+
+//       q = q_dirBSE[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
+//          //(D.f[DIR_MPP])[ktnw]=(one-q)/(one+q)*(f_BSE-feq*om_turb)/(one-om_turb)+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);
+//          //(D.f[DIR_MPP])[ktnw]=zero;
+//       }
+
+//       q = q_dirTNW[k];
+//       if (q>=c0o1 && q<=c1o1)
+//       {
+// 		 VeloX = fac*vx1;
+// 	     VeloY = fac*vx2;
+// 	     VeloZ = fac*vx3;
+// 		 if (x == true) VeloX = c0o1;
+// 		 if (y == true) VeloY = c0o1;
+// 		 if (z == true) VeloZ = c0o1;
+//          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
+//          (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+//          //feq=c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
+//          //(D.f[DIR_PMM])[kbse]=(one-q)/(one+q)*(f_TNW-feq*om_turb)/(one-om_turb)+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);
+//          //(D.f[DIR_PMM])[kbse]=zero;
+//       }
+//    }
+// }
+
 
 
 
@@ -2027,7 +3378,7 @@ extern "C" __global__ void QSlipDeviceComp27TurbViscosity(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipGeomDeviceComp27(real* DD, 
+__global__ void QSlipGeomDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
 												 unsigned int  numberOfBCnodes,
@@ -2044,63 +3395,63 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2121,128 +3472,128 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
               *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[E   * numberOfBCnodes];
-      nx_dirW   = &NormalX[W   * numberOfBCnodes];
-      nx_dirN   = &NormalX[N   * numberOfBCnodes];
-      nx_dirS   = &NormalX[S   * numberOfBCnodes];
-      nx_dirT   = &NormalX[T   * numberOfBCnodes];
-      nx_dirB   = &NormalX[B   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[NE  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[SW  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[SE  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[NW  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[TE  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[BW  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[BE  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[TW  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[TN  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[BS  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[BN  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[TS  * numberOfBCnodes];
-      nx_dirTNE = &NormalX[TNE * numberOfBCnodes];
-      nx_dirTSW = &NormalX[TSW * numberOfBCnodes];
-      nx_dirTSE = &NormalX[TSE * numberOfBCnodes];
-      nx_dirTNW = &NormalX[TNW * numberOfBCnodes];
-      nx_dirBNE = &NormalX[BNE * numberOfBCnodes];
-      nx_dirBSW = &NormalX[BSW * numberOfBCnodes];
-      nx_dirBSE = &NormalX[BSE * numberOfBCnodes];
-      nx_dirBNW = &NormalX[BNW * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
+      nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
+      nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
+      nx_dirTNW = &NormalX[DIR_MPP * numberOfBCnodes];
+      nx_dirBNE = &NormalX[DIR_PPM * numberOfBCnodes];
+      nx_dirBSW = &NormalX[DIR_MMM * numberOfBCnodes];
+      nx_dirBSE = &NormalX[DIR_PMM * numberOfBCnodes];
+      nx_dirBNW = &NormalX[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
               *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[E   * numberOfBCnodes];
-      ny_dirW   = &NormalY[W   * numberOfBCnodes];
-      ny_dirN   = &NormalY[N   * numberOfBCnodes];
-      ny_dirS   = &NormalY[S   * numberOfBCnodes];
-      ny_dirT   = &NormalY[T   * numberOfBCnodes];
-      ny_dirB   = &NormalY[B   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[NE  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[SW  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[SE  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[NW  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[TE  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[BW  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[BE  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[TW  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[TN  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[BS  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[BN  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[TS  * numberOfBCnodes];
-      ny_dirTNE = &NormalY[TNE * numberOfBCnodes];
-      ny_dirTSW = &NormalY[TSW * numberOfBCnodes];
-      ny_dirTSE = &NormalY[TSE * numberOfBCnodes];
-      ny_dirTNW = &NormalY[TNW * numberOfBCnodes];
-      ny_dirBNE = &NormalY[BNE * numberOfBCnodes];
-      ny_dirBSW = &NormalY[BSW * numberOfBCnodes];
-      ny_dirBSE = &NormalY[BSE * numberOfBCnodes];
-      ny_dirBNW = &NormalY[BNW * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
+      ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
+      ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
+      ny_dirTNW = &NormalY[DIR_MPP * numberOfBCnodes];
+      ny_dirBNE = &NormalY[DIR_PPM * numberOfBCnodes];
+      ny_dirBSW = &NormalY[DIR_MMM * numberOfBCnodes];
+      ny_dirBSE = &NormalY[DIR_PMM * numberOfBCnodes];
+      ny_dirBNW = &NormalY[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
               *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[E   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[W   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[N   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[S   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[T   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[B   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[NE  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[SW  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[SE  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[NW  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[TE  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[BW  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[BE  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[TW  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[TN  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[BS  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[BN  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[TS  * numberOfBCnodes];
-      nz_dirTNE = &NormalZ[TNE * numberOfBCnodes];
-      nz_dirTSW = &NormalZ[TSW * numberOfBCnodes];
-      nz_dirTSE = &NormalZ[TSE * numberOfBCnodes];
-      nz_dirTNW = &NormalZ[TNW * numberOfBCnodes];
-      nz_dirBNE = &NormalZ[BNE * numberOfBCnodes];
-      nz_dirBSW = &NormalZ[BSW * numberOfBCnodes];
-      nz_dirBSE = &NormalZ[BSE * numberOfBCnodes];
-      nz_dirBNW = &NormalZ[BNW * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
+      nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
+      nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
+      nz_dirTNW = &NormalZ[DIR_MPP * numberOfBCnodes];
+      nz_dirBNE = &NormalZ[DIR_PPM * numberOfBCnodes];
+      nz_dirBSW = &NormalZ[DIR_MMM * numberOfBCnodes];
+      nz_dirBSE = &NormalZ[DIR_PMM * numberOfBCnodes];
+      nz_dirBNW = &NormalZ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2274,37 +3625,37 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -2324,63 +3675,63 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
@@ -2402,11 +3753,11 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 
 	  //fac = fac * magS / (c1o3 * (one / om1 - c1o2));
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  //real *facAst = &QQ[REST * numberOfBCnodes];
+	  //real *facAst = &QQ[DIR_000 * numberOfBCnodes];
 
 	  //fac = fac * alpha + facAst[k] * (one - alpha);
 	  //facAst[k] = fac;
-	  //(&QQ[REST * numberOfBCnodes])[KQK] = fac;
+	  //(&QQ[DIR_000 * numberOfBCnodes])[KQK] = fac;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ////real uk = sqrtf(vx1*vx1 + vx2*vx2 + vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2462,7 +3813,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs( nx_dirE[k]) + fac);
 		 VeloX *= phi;
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirW[k];
@@ -2476,7 +3827,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs(-nx_dirW[k]) + fac);
 		 VeloX *= phi;
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirN[k];
@@ -2490,7 +3841,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs( ny_dirN[k]) + fac);
 		 VeloY *= phi;
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirS[k];
@@ -2504,7 +3855,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs(-ny_dirS[k]) + fac);
 		 VeloY *= phi;
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirT[k];
@@ -2518,7 +3869,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs( nz_dirT[k]) + fac);
 		 VeloZ *= phi;
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirB[k];
@@ -2532,7 +3883,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 phi = fac / (q * fabs(-nz_dirB[k]) + fac);
 		 VeloZ *= phi;
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirNE[k];
@@ -2548,7 +3899,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloY *= phi;
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirSW[k];
@@ -2564,7 +3915,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloY *= phi;
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirSE[k];
@@ -2580,7 +3931,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloY *= phi;
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirNW[k];
@@ -2596,7 +3947,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloY *= phi;
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTE[k];
@@ -2612,7 +3963,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBW[k];
@@ -2628,7 +3979,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBE[k];
@@ -2644,7 +3995,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTW[k];
@@ -2660,7 +4011,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloX *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTN[k];
@@ -2676,7 +4027,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBS[k];
@@ -2692,7 +4043,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBN[k];
@@ -2708,7 +4059,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTS[k];
@@ -2724,7 +4075,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTNE[k];
@@ -2742,7 +4093,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBSW[k];
@@ -2760,7 +4111,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBNE[k];
@@ -2778,7 +4129,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTSW[k];
@@ -2796,7 +4147,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTSE[k];
@@ -2814,7 +4165,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBNW[k];
@@ -2832,7 +4183,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBSE[k];
@@ -2850,7 +4201,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTNW[k];
@@ -2868,7 +4219,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 		 VeloY *= phi;
 		 VeloZ *= phi;
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q) - c1o216 * drho;
       }
    }
 }
@@ -2913,7 +4264,7 @@ extern "C" __global__ void QSlipGeomDeviceComp27(real* DD,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QSlipNormDeviceComp27(real* DD, 
+__global__ void QSlipNormDeviceComp27(real* DD, 
 												 int* k_Q, 
 												 real* QQ,
 												 unsigned int  numberOfBCnodes,
@@ -2930,63 +4281,63 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3007,128 +4358,128 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nx_dirE,   *nx_dirW,   *nx_dirN,   *nx_dirS,   *nx_dirT,   *nx_dirB, 
               *nx_dirNE,  *nx_dirSW,  *nx_dirSE,  *nx_dirNW,  *nx_dirTE,  *nx_dirBW,
               *nx_dirBE,  *nx_dirTW,  *nx_dirTN,  *nx_dirBS,  *nx_dirBN,  *nx_dirTS,
               *nx_dirTNE, *nx_dirTSW, *nx_dirTSE, *nx_dirTNW, *nx_dirBNE, *nx_dirBSW,
               *nx_dirBSE, *nx_dirBNW; 
-      nx_dirE   = &NormalX[E   * numberOfBCnodes];
-      nx_dirW   = &NormalX[W   * numberOfBCnodes];
-      nx_dirN   = &NormalX[N   * numberOfBCnodes];
-      nx_dirS   = &NormalX[S   * numberOfBCnodes];
-      nx_dirT   = &NormalX[T   * numberOfBCnodes];
-      nx_dirB   = &NormalX[B   * numberOfBCnodes];
-      nx_dirNE  = &NormalX[NE  * numberOfBCnodes];
-      nx_dirSW  = &NormalX[SW  * numberOfBCnodes];
-      nx_dirSE  = &NormalX[SE  * numberOfBCnodes];
-      nx_dirNW  = &NormalX[NW  * numberOfBCnodes];
-      nx_dirTE  = &NormalX[TE  * numberOfBCnodes];
-      nx_dirBW  = &NormalX[BW  * numberOfBCnodes];
-      nx_dirBE  = &NormalX[BE  * numberOfBCnodes];
-      nx_dirTW  = &NormalX[TW  * numberOfBCnodes];
-      nx_dirTN  = &NormalX[TN  * numberOfBCnodes];
-      nx_dirBS  = &NormalX[BS  * numberOfBCnodes];
-      nx_dirBN  = &NormalX[BN  * numberOfBCnodes];
-      nx_dirTS  = &NormalX[TS  * numberOfBCnodes];
-      nx_dirTNE = &NormalX[TNE * numberOfBCnodes];
-      nx_dirTSW = &NormalX[TSW * numberOfBCnodes];
-      nx_dirTSE = &NormalX[TSE * numberOfBCnodes];
-      nx_dirTNW = &NormalX[TNW * numberOfBCnodes];
-      nx_dirBNE = &NormalX[BNE * numberOfBCnodes];
-      nx_dirBSW = &NormalX[BSW * numberOfBCnodes];
-      nx_dirBSE = &NormalX[BSE * numberOfBCnodes];
-      nx_dirBNW = &NormalX[BNW * numberOfBCnodes];
+      nx_dirE   = &NormalX[DIR_P00   * numberOfBCnodes];
+      nx_dirW   = &NormalX[DIR_M00   * numberOfBCnodes];
+      nx_dirN   = &NormalX[DIR_0P0   * numberOfBCnodes];
+      nx_dirS   = &NormalX[DIR_0M0   * numberOfBCnodes];
+      nx_dirT   = &NormalX[DIR_00P   * numberOfBCnodes];
+      nx_dirB   = &NormalX[DIR_00M   * numberOfBCnodes];
+      nx_dirNE  = &NormalX[DIR_PP0  * numberOfBCnodes];
+      nx_dirSW  = &NormalX[DIR_MM0  * numberOfBCnodes];
+      nx_dirSE  = &NormalX[DIR_PM0  * numberOfBCnodes];
+      nx_dirNW  = &NormalX[DIR_MP0  * numberOfBCnodes];
+      nx_dirTE  = &NormalX[DIR_P0P  * numberOfBCnodes];
+      nx_dirBW  = &NormalX[DIR_M0M  * numberOfBCnodes];
+      nx_dirBE  = &NormalX[DIR_P0M  * numberOfBCnodes];
+      nx_dirTW  = &NormalX[DIR_M0P  * numberOfBCnodes];
+      nx_dirTN  = &NormalX[DIR_0PP  * numberOfBCnodes];
+      nx_dirBS  = &NormalX[DIR_0MM  * numberOfBCnodes];
+      nx_dirBN  = &NormalX[DIR_0PM  * numberOfBCnodes];
+      nx_dirTS  = &NormalX[DIR_0MP  * numberOfBCnodes];
+      nx_dirTNE = &NormalX[DIR_PPP * numberOfBCnodes];
+      nx_dirTSW = &NormalX[DIR_MMP * numberOfBCnodes];
+      nx_dirTSE = &NormalX[DIR_PMP * numberOfBCnodes];
+      nx_dirTNW = &NormalX[DIR_MPP * numberOfBCnodes];
+      nx_dirBNE = &NormalX[DIR_PPM * numberOfBCnodes];
+      nx_dirBSW = &NormalX[DIR_MMM * numberOfBCnodes];
+      nx_dirBSE = &NormalX[DIR_PMM * numberOfBCnodes];
+      nx_dirBNW = &NormalX[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *ny_dirE,   *ny_dirW,   *ny_dirN,   *ny_dirS,   *ny_dirT,   *ny_dirB, 
               *ny_dirNE,  *ny_dirSW,  *ny_dirSE,  *ny_dirNW,  *ny_dirTE,  *ny_dirBW,
               *ny_dirBE,  *ny_dirTW,  *ny_dirTN,  *ny_dirBS,  *ny_dirBN,  *ny_dirTS,
               *ny_dirTNE, *ny_dirTSW, *ny_dirTSE, *ny_dirTNW, *ny_dirBNE, *ny_dirBSW,
               *ny_dirBSE, *ny_dirBNW; 
-      ny_dirE   = &NormalY[E   * numberOfBCnodes];
-      ny_dirW   = &NormalY[W   * numberOfBCnodes];
-      ny_dirN   = &NormalY[N   * numberOfBCnodes];
-      ny_dirS   = &NormalY[S   * numberOfBCnodes];
-      ny_dirT   = &NormalY[T   * numberOfBCnodes];
-      ny_dirB   = &NormalY[B   * numberOfBCnodes];
-      ny_dirNE  = &NormalY[NE  * numberOfBCnodes];
-      ny_dirSW  = &NormalY[SW  * numberOfBCnodes];
-      ny_dirSE  = &NormalY[SE  * numberOfBCnodes];
-      ny_dirNW  = &NormalY[NW  * numberOfBCnodes];
-      ny_dirTE  = &NormalY[TE  * numberOfBCnodes];
-      ny_dirBW  = &NormalY[BW  * numberOfBCnodes];
-      ny_dirBE  = &NormalY[BE  * numberOfBCnodes];
-      ny_dirTW  = &NormalY[TW  * numberOfBCnodes];
-      ny_dirTN  = &NormalY[TN  * numberOfBCnodes];
-      ny_dirBS  = &NormalY[BS  * numberOfBCnodes];
-      ny_dirBN  = &NormalY[BN  * numberOfBCnodes];
-      ny_dirTS  = &NormalY[TS  * numberOfBCnodes];
-      ny_dirTNE = &NormalY[TNE * numberOfBCnodes];
-      ny_dirTSW = &NormalY[TSW * numberOfBCnodes];
-      ny_dirTSE = &NormalY[TSE * numberOfBCnodes];
-      ny_dirTNW = &NormalY[TNW * numberOfBCnodes];
-      ny_dirBNE = &NormalY[BNE * numberOfBCnodes];
-      ny_dirBSW = &NormalY[BSW * numberOfBCnodes];
-      ny_dirBSE = &NormalY[BSE * numberOfBCnodes];
-      ny_dirBNW = &NormalY[BNW * numberOfBCnodes];
+      ny_dirE   = &NormalY[DIR_P00   * numberOfBCnodes];
+      ny_dirW   = &NormalY[DIR_M00   * numberOfBCnodes];
+      ny_dirN   = &NormalY[DIR_0P0   * numberOfBCnodes];
+      ny_dirS   = &NormalY[DIR_0M0   * numberOfBCnodes];
+      ny_dirT   = &NormalY[DIR_00P   * numberOfBCnodes];
+      ny_dirB   = &NormalY[DIR_00M   * numberOfBCnodes];
+      ny_dirNE  = &NormalY[DIR_PP0  * numberOfBCnodes];
+      ny_dirSW  = &NormalY[DIR_MM0  * numberOfBCnodes];
+      ny_dirSE  = &NormalY[DIR_PM0  * numberOfBCnodes];
+      ny_dirNW  = &NormalY[DIR_MP0  * numberOfBCnodes];
+      ny_dirTE  = &NormalY[DIR_P0P  * numberOfBCnodes];
+      ny_dirBW  = &NormalY[DIR_M0M  * numberOfBCnodes];
+      ny_dirBE  = &NormalY[DIR_P0M  * numberOfBCnodes];
+      ny_dirTW  = &NormalY[DIR_M0P  * numberOfBCnodes];
+      ny_dirTN  = &NormalY[DIR_0PP  * numberOfBCnodes];
+      ny_dirBS  = &NormalY[DIR_0MM  * numberOfBCnodes];
+      ny_dirBN  = &NormalY[DIR_0PM  * numberOfBCnodes];
+      ny_dirTS  = &NormalY[DIR_0MP  * numberOfBCnodes];
+      ny_dirTNE = &NormalY[DIR_PPP * numberOfBCnodes];
+      ny_dirTSW = &NormalY[DIR_MMP * numberOfBCnodes];
+      ny_dirTSE = &NormalY[DIR_PMP * numberOfBCnodes];
+      ny_dirTNW = &NormalY[DIR_MPP * numberOfBCnodes];
+      ny_dirBNE = &NormalY[DIR_PPM * numberOfBCnodes];
+      ny_dirBSW = &NormalY[DIR_MMM * numberOfBCnodes];
+      ny_dirBSE = &NormalY[DIR_PMM * numberOfBCnodes];
+      ny_dirBNW = &NormalY[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       real *nz_dirE,   *nz_dirW,   *nz_dirN,   *nz_dirS,   *nz_dirT,   *nz_dirB, 
               *nz_dirNE,  *nz_dirSW,  *nz_dirSE,  *nz_dirNW,  *nz_dirTE,  *nz_dirBW,
               *nz_dirBE,  *nz_dirTW,  *nz_dirTN,  *nz_dirBS,  *nz_dirBN,  *nz_dirTS,
               *nz_dirTNE, *nz_dirTSW, *nz_dirTSE, *nz_dirTNW, *nz_dirBNE, *nz_dirBSW,
               *nz_dirBSE, *nz_dirBNW; 
-      nz_dirE   = &NormalZ[E   * numberOfBCnodes];
-      nz_dirW   = &NormalZ[W   * numberOfBCnodes];
-      nz_dirN   = &NormalZ[N   * numberOfBCnodes];
-      nz_dirS   = &NormalZ[S   * numberOfBCnodes];
-      nz_dirT   = &NormalZ[T   * numberOfBCnodes];
-      nz_dirB   = &NormalZ[B   * numberOfBCnodes];
-      nz_dirNE  = &NormalZ[NE  * numberOfBCnodes];
-      nz_dirSW  = &NormalZ[SW  * numberOfBCnodes];
-      nz_dirSE  = &NormalZ[SE  * numberOfBCnodes];
-      nz_dirNW  = &NormalZ[NW  * numberOfBCnodes];
-      nz_dirTE  = &NormalZ[TE  * numberOfBCnodes];
-      nz_dirBW  = &NormalZ[BW  * numberOfBCnodes];
-      nz_dirBE  = &NormalZ[BE  * numberOfBCnodes];
-      nz_dirTW  = &NormalZ[TW  * numberOfBCnodes];
-      nz_dirTN  = &NormalZ[TN  * numberOfBCnodes];
-      nz_dirBS  = &NormalZ[BS  * numberOfBCnodes];
-      nz_dirBN  = &NormalZ[BN  * numberOfBCnodes];
-      nz_dirTS  = &NormalZ[TS  * numberOfBCnodes];
-      nz_dirTNE = &NormalZ[TNE * numberOfBCnodes];
-      nz_dirTSW = &NormalZ[TSW * numberOfBCnodes];
-      nz_dirTSE = &NormalZ[TSE * numberOfBCnodes];
-      nz_dirTNW = &NormalZ[TNW * numberOfBCnodes];
-      nz_dirBNE = &NormalZ[BNE * numberOfBCnodes];
-      nz_dirBSW = &NormalZ[BSW * numberOfBCnodes];
-      nz_dirBSE = &NormalZ[BSE * numberOfBCnodes];
-      nz_dirBNW = &NormalZ[BNW * numberOfBCnodes];
+      nz_dirE   = &NormalZ[DIR_P00   * numberOfBCnodes];
+      nz_dirW   = &NormalZ[DIR_M00   * numberOfBCnodes];
+      nz_dirN   = &NormalZ[DIR_0P0   * numberOfBCnodes];
+      nz_dirS   = &NormalZ[DIR_0M0   * numberOfBCnodes];
+      nz_dirT   = &NormalZ[DIR_00P   * numberOfBCnodes];
+      nz_dirB   = &NormalZ[DIR_00M   * numberOfBCnodes];
+      nz_dirNE  = &NormalZ[DIR_PP0  * numberOfBCnodes];
+      nz_dirSW  = &NormalZ[DIR_MM0  * numberOfBCnodes];
+      nz_dirSE  = &NormalZ[DIR_PM0  * numberOfBCnodes];
+      nz_dirNW  = &NormalZ[DIR_MP0  * numberOfBCnodes];
+      nz_dirTE  = &NormalZ[DIR_P0P  * numberOfBCnodes];
+      nz_dirBW  = &NormalZ[DIR_M0M  * numberOfBCnodes];
+      nz_dirBE  = &NormalZ[DIR_P0M  * numberOfBCnodes];
+      nz_dirTW  = &NormalZ[DIR_M0P  * numberOfBCnodes];
+      nz_dirTN  = &NormalZ[DIR_0PP  * numberOfBCnodes];
+      nz_dirBS  = &NormalZ[DIR_0MM  * numberOfBCnodes];
+      nz_dirBN  = &NormalZ[DIR_0PM  * numberOfBCnodes];
+      nz_dirTS  = &NormalZ[DIR_0MP  * numberOfBCnodes];
+      nz_dirTNE = &NormalZ[DIR_PPP * numberOfBCnodes];
+      nz_dirTSW = &NormalZ[DIR_MMP * numberOfBCnodes];
+      nz_dirTSE = &NormalZ[DIR_PMP * numberOfBCnodes];
+      nz_dirTNW = &NormalZ[DIR_MPP * numberOfBCnodes];
+      nz_dirBNE = &NormalZ[DIR_PPM * numberOfBCnodes];
+      nz_dirBSW = &NormalZ[DIR_MMM * numberOfBCnodes];
+      nz_dirBSE = &NormalZ[DIR_PMM * numberOfBCnodes];
+      nz_dirBNW = &NormalZ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3160,37 +4511,37 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
       unsigned int ktne = KQK;
       unsigned int kbsw = neighborZ[ksw];
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -3210,63 +4561,63 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real VeloX = vx1;
@@ -3288,11 +4639,11 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 
 	  fac = fac * magS / (c1o3 * (c1o1 / om1 - c1o2));
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-	  real *facAst = &QQ[REST * numberOfBCnodes];
+	  real *facAst = &QQ[DIR_000 * numberOfBCnodes];
 
 	  fac = fac * alpha + facAst[k] * (c1o1 - alpha);
 	  facAst[k] = fac;
-	  //(&QQ[REST * numberOfBCnodes])[KQK] = fac;
+	  //(&QQ[DIR_000 * numberOfBCnodes])[KQK] = fac;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ////real uk = sqrtf(vx1*vx1 + vx2*vx2 + vx3*vx3);
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3354,7 +4705,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirW[k];
@@ -3369,7 +4720,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirN[k];
@@ -3384,7 +4735,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirS[k];
@@ -3399,7 +4750,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirT[k];
@@ -3414,7 +4765,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirB[k];
@@ -3429,7 +4780,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T))/(c1o1+q) - c2o27 * drho;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T))/(c1o1+q) - c2o27 * drho;
       }
 
       q = q_dirNE[k];
@@ -3445,7 +4796,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirSW[k];
@@ -3461,7 +4812,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirSE[k];
@@ -3477,7 +4828,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirNW[k];
@@ -3493,7 +4844,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTE[k];
@@ -3509,7 +4860,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBW[k];
@@ -3525,7 +4876,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBE[k];
@@ -3541,7 +4892,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTW[k];
@@ -3557,7 +4908,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTN[k];
@@ -3573,7 +4924,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBS[k];
@@ -3589,7 +4940,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirBN[k];
@@ -3605,7 +4956,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTS[k];
@@ -3621,7 +4972,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN))/(c1o1+q) - c1o54 * drho;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN))/(c1o1+q) - c1o54 * drho;
       }
 
       q = q_dirTNE[k];
@@ -3640,7 +4991,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBSW[k];
@@ -3659,7 +5010,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBNE[k];
@@ -3678,7 +5029,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTSW[k];
@@ -3697,7 +5048,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTSE[k];
@@ -3716,7 +5067,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBNW[k];
@@ -3735,7 +5086,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirBSE[k];
@@ -3754,7 +5105,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q) - c1o216 * drho;
       }
 
       q = q_dirTNW[k];
@@ -3773,7 +5124,7 @@ extern "C" __global__ void QSlipNormDeviceComp27(real* DD,
 		 //tangential = (tangential > one) ? one:tangential;
 		 q = (q + qSlip)/(c1o1 + qSlip * (c1o1 - tangential) / (smallSingle + q));
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q) - c1o216 * drho;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q) - c1o216 * drho;
       }
    }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
index 6a966e46f7a2e21bd825801bbe6be1df207303d2..74e2faa38638228aa5d499aa74226405ab109f7d 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/StressBCs27.cu
@@ -43,12 +43,13 @@
 #include "LBM/LB.h"
 #include "lbm/constants/D3Q27.h"
 #include <lbm/constants/NumericConstants.h>
+#include "KernelUtilities.h"
 
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
+__host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
                                                          real* _wallNormalX, real* _wallNormalY, real* _wallNormalZ,
                                                          real* vx, real* vy, real* vz,
                                                          real* vx_el,      real* vy_el,      real* vz_el,      //!>mean (temporally filtered) velocities at exchange location
@@ -135,7 +136,7 @@ extern "C" __host__ __device__ __forceinline__ void iMEM(uint k, uint kN,
 }
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QStressDeviceComp27(real* DD,
+__global__ void QStressDeviceComp27(real* DD,
 											   int* k_Q,
                                     int* k_N,
 											   real* QQ,
@@ -171,63 +172,63 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)//get right array of post coll f's
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    }
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -248,32 +249,32 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -308,38 +309,38 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];     //post-coll f's
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];     //post-coll f's
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -360,63 +361,63 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)      //get adress where incoming f's should be written to
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       }
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Compute incoming f's with zero wall velocity
@@ -426,60 +427,68 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       real f_E_in = 0.0,  f_W_in = 0.0,  f_N_in = 0.0,  f_S_in = 0.0,  f_T_in = 0.0,  f_B_in = 0.0,   f_NE_in = 0.0,  f_SW_in = 0.0,  f_SE_in = 0.0,  f_NW_in = 0.0,  f_TE_in = 0.0,  f_BW_in = 0.0,  f_BE_in = 0.0, f_TW_in = 0.0, f_TN_in = 0.0, f_BS_in = 0.0, f_BN_in = 0.0, f_TS_in = 0.0, f_TNE_in = 0.0, f_TSW_in = 0.0, f_TSE_in = 0.0, f_TNW_in = 0.0, f_BNE_in = 0.0, f_BSW_in = 0.0, f_BSE_in = 0.0, f_BNW_in = 0.0;
       // momentum exchanged with wall at rest
       real wallMomentumX = 0.0, wallMomentumY = 0.0, wallMomentumZ = 0.0;
-
+      real velocityLB = 0.0;
+      
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq);
-         f_W_in=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_E+f_W))/(c1o1+q) - c2o27 * drho;
+         velocityLB = vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_W_in = getInterpolatedDistributionForNoSlipBC(q, f_E, f_W, feq, om_turb);
          wallMomentumX += f_E+f_W_in;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq);
-         f_E_in=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_W+f_E))/(c1o1+q) - c2o27 * drho;
+         velocityLB = -vx1;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_E_in = getInterpolatedDistributionForNoSlipBC(q, f_W, f_E, feq, om_turb);
          wallMomentumX -= f_W+f_E_in;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq);
-         f_S_in=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_N+f_S))/(c1o1+q) - c2o27 * drho;
+         velocityLB = vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_S_in = getInterpolatedDistributionForNoSlipBC(q, f_N, f_S, feq, om_turb);
          wallMomentumY += f_N+f_S_in;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq);
-         f_N_in=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_S+f_N))/(c1o1+q) - c2o27 * drho;
+         velocityLB = -vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_N_in = getInterpolatedDistributionForNoSlipBC(q, f_S, f_N, feq, om_turb);
          wallMomentumY -= f_S+f_N_in;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq);
-         f_B_in=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_T+f_B))/(c1o1+q) - c2o27 * drho;
+         velocityLB = vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_B_in = getInterpolatedDistributionForNoSlipBC(q, f_T, f_B, feq, om_turb);
          wallMomentumZ += f_T+f_B_in;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq);
-         f_T_in=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_B+f_T))/(c1o1+q) - c2o27 * drho;
+         velocityLB = -vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
+         f_T_in = getInterpolatedDistributionForNoSlipBC(q, f_B, f_T, feq, om_turb);
          wallMomentumZ -= f_B+f_T_in;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq);
-         f_SW_in=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NE+f_SW))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_SW_in = getInterpolatedDistributionForNoSlipBC(q, f_NE, f_SW, feq, om_turb);
          wallMomentumX += f_NE+f_SW_in;
          wallMomentumY += f_NE+f_SW_in;
       }
@@ -487,8 +496,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq);
-         f_NE_in=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SW+f_NE))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_NE_in = getInterpolatedDistributionForNoSlipBC(q, f_SW, f_NE, feq, om_turb);
          wallMomentumX -= f_SW+f_NE_in;
          wallMomentumY -= f_SW+f_NE_in;
       }
@@ -496,8 +506,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq);
-         f_NW_in=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_SE+f_NW))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx1 - vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_NW_in = getInterpolatedDistributionForNoSlipBC(q, f_SE, f_NW, feq, om_turb);
          wallMomentumX += f_SE+f_NW_in;
          wallMomentumY -= f_SE+f_NW_in;
       }
@@ -505,8 +516,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq);
-         f_SE_in=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_NW+f_SE))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx1 + vx2;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_SE_in = getInterpolatedDistributionForNoSlipBC(q, f_NW, f_SE, feq, om_turb);
          wallMomentumX -= f_NW+f_SE_in;
          wallMomentumY += f_NW+f_SE_in;
       }
@@ -514,8 +526,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq);
-         f_BW_in=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TE+f_BW))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_BW_in = getInterpolatedDistributionForNoSlipBC(q, f_TE, f_BW, feq, om_turb);
          wallMomentumX += f_TE+f_BW_in;
          wallMomentumZ += f_TE+f_BW_in;
       }
@@ -523,8 +536,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq);
-         f_TE_in=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BW+f_TE))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_TE_in = getInterpolatedDistributionForNoSlipBC(q, f_BW, f_TE, feq, om_turb);
          wallMomentumX -= f_BW+f_TE_in;
          wallMomentumZ -= f_BW+f_TE_in;
       }
@@ -532,8 +546,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq);
-         f_TW_in=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BE+f_TW))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx1 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_TW_in = getInterpolatedDistributionForNoSlipBC(q, f_BE, f_TW, feq, om_turb);
          wallMomentumX += f_BE+f_TW_in;
          wallMomentumZ -= f_BE+f_TW_in;
       }
@@ -541,8 +556,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq);
-         f_BE_in=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TW+f_BE))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx1 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_BE_in = getInterpolatedDistributionForNoSlipBC(q, f_TW, f_BE, feq, om_turb);
          wallMomentumX -= f_TW+f_BE_in;
          wallMomentumZ += f_TW+f_BE_in;
       }
@@ -550,8 +566,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BS_in=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TN+f_BS))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_BS_in = getInterpolatedDistributionForNoSlipBC(q, f_TN, f_BS, feq, om_turb);
          wallMomentumY += f_TN+f_BS_in;
          wallMomentumZ += f_TN+f_BS_in;
       }
@@ -559,8 +576,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TN_in=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BS+f_TN))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_TN_in = getInterpolatedDistributionForNoSlipBC(q, f_BS, f_TN, feq, om_turb);
          wallMomentumY -= f_BS+f_TN_in;
          wallMomentumZ -= f_BS+f_TN_in;
       }
@@ -568,8 +586,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TS_in=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BN+f_TS))/(c1o1+q) - c1o54 * drho;
+         velocityLB = vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_TS_in = getInterpolatedDistributionForNoSlipBC(q, f_BN, f_TS, feq, om_turb);
          wallMomentumY += f_BN+f_TS_in;
          wallMomentumZ -= f_BN+f_TS_in;
       }
@@ -577,8 +596,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BN_in=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TS+f_BN))/(c1o1+q) - c1o54 * drho;
+         velocityLB = -vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
+         f_BN_in = getInterpolatedDistributionForNoSlipBC(q, f_TS, f_BN, feq, om_turb);
          wallMomentumY -= f_TS+f_BN_in;
          wallMomentumZ += f_TS+f_BN_in;
       }
@@ -586,8 +606,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BSW_in=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNE+f_BSW))/(c1o1+q) - c1o216 * drho;
+         velocityLB = vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_BSW_in = getInterpolatedDistributionForNoSlipBC(q, f_TNE, f_BSW, feq, om_turb);
          wallMomentumX += f_TNE+f_BSW_in;
          wallMomentumY += f_TNE+f_BSW_in;
          wallMomentumZ += f_TNE+f_BSW_in;
@@ -596,8 +617,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TNE_in=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSW+f_TNE))/(c1o1+q) - c1o216 * drho;
+         velocityLB = -vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_TNE_in = getInterpolatedDistributionForNoSlipBC(q, f_BSW, f_TNE, feq, om_turb);
          wallMomentumX -= f_BSW+f_TNE_in;
          wallMomentumY -= f_BSW+f_TNE_in;
          wallMomentumZ -= f_BSW+f_TNE_in;
@@ -606,8 +628,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TSW_in=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNE+f_TSW))/(c1o1+q) - c1o216 * drho;
+         velocityLB = vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_TSW_in = getInterpolatedDistributionForNoSlipBC(q, f_BNE, f_TSW, feq, om_turb);
          wallMomentumX += f_BNE+f_TSW_in;
          wallMomentumY += f_BNE+f_TSW_in;
          wallMomentumZ -= f_BNE+f_TSW_in;
@@ -616,8 +639,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BNE_in=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSW+f_BNE))/(c1o1+q) - c1o216 * drho;
+         velocityLB = -vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_BNE_in = getInterpolatedDistributionForNoSlipBC(q, f_TSW, f_BNE, feq, om_turb);
          wallMomentumX -= f_TSW+f_BNE_in;
          wallMomentumY -= f_TSW+f_BNE_in;
          wallMomentumZ += f_TSW+f_BNE_in;
@@ -626,8 +650,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BNW_in=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TSE+f_BNW))/(c1o1+q) - c1o216 * drho;
+         velocityLB = vx1 - vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_BNW_in = getInterpolatedDistributionForNoSlipBC(q, f_TSE, f_BNW, feq, om_turb);
          wallMomentumX += f_TSE+f_BNW_in;
          wallMomentumY -= f_TSE+f_BNW_in;
          wallMomentumZ += f_TSE+f_BNW_in;
@@ -636,8 +661,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TSE_in=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BNW+f_TSE))/(c1o1+q) - c1o216 * drho;
+         velocityLB = -vx1 + vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_TSE_in = getInterpolatedDistributionForNoSlipBC(q, f_BNW, f_TSE, feq, om_turb);
          wallMomentumX -= f_BNW+f_TSE_in;
          wallMomentumY += f_BNW+f_TSE_in;
          wallMomentumZ -= f_BNW+f_TSE_in;
@@ -646,8 +672,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq);
-         f_TNW_in=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_BSE+f_TNW))/(c1o1+q) - c1o216 * drho;
+         velocityLB = vx1 - vx2 - vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_TNW_in = getInterpolatedDistributionForNoSlipBC(q, f_BSE, f_TNW, feq, om_turb);
          wallMomentumX += f_BSE+f_TNW_in;
          wallMomentumY -= f_BSE+f_TNW_in;
          wallMomentumZ -= f_BSE+f_TNW_in;
@@ -656,8 +683,9 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq);
-         f_BSE_in=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om_turb)/(c1o1-om_turb))*c1o2+(q*(f_TNW+f_BSE))/(c1o1+q) - c1o216 * drho;
+         velocityLB = -vx1 + vx2 + vx3;
+         feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
+         f_BSE_in = getInterpolatedDistributionForNoSlipBC(q, f_TNW, f_BSE, feq, om_turb);
          wallMomentumX -= f_TNW+f_BSE_in;
          wallMomentumY += f_TNW+f_BSE_in;
          wallMomentumZ += f_TNW+f_BSE_in;
@@ -695,49 +723,49 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[W])[kw] = f_W_in - (c6o1*c2o27*( VeloX     ))/(c1o1+q);
+         (D.f[DIR_M00])[kw] = f_W_in - (c6o1*c2o27*( VeloX     ))/(c1o1+q);
          wallMomentumX += -(c6o1*c2o27*( VeloX     ))/(c1o1+q);
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[E])[ke] = f_E_in - (c6o1*c2o27*(-VeloX     ))/(c1o1+q);
+         (D.f[DIR_P00])[ke] = f_E_in - (c6o1*c2o27*(-VeloX     ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c2o27*(-VeloX     ))/(c1o1+q);
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[S])[ks] = f_S_in - (c6o1*c2o27*( VeloY     ))/(c1o1+q);
+         (D.f[DIR_0M0])[ks] = f_S_in - (c6o1*c2o27*( VeloY     ))/(c1o1+q);
          wallMomentumY += - (c6o1*c2o27*( VeloY     ))/(c1o1+q);
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[N])[kn] = f_N_in - (c6o1*c2o27*(-VeloY     ))/(c1o1+q);
+         (D.f[DIR_0P0])[kn] = f_N_in - (c6o1*c2o27*(-VeloY     ))/(c1o1+q);
          wallMomentumY -=  -(c6o1*c2o27*(-VeloY     ))/(c1o1+q);
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[B])[kb] = f_B_in - (c6o1*c2o27*( VeloZ     ))/(c1o1+q);
+         (D.f[DIR_00M])[kb] = f_B_in - (c6o1*c2o27*( VeloZ     ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c2o27*( VeloZ     ))/(c1o1+q);
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[T])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
+         (D.f[DIR_00P])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
          wallMomentumZ -= -(c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SW])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
+         (D.f[DIR_MM0])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
          wallMomentumX +=  -(c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
          wallMomentumY +=  -(c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
       }
@@ -745,7 +773,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NE])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
+         (D.f[DIR_PP0])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
       }
@@ -753,7 +781,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NW])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
+         (D.f[DIR_MP0])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
          wallMomentumX += -(c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
          wallMomentumY -= -(c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
       }
@@ -761,7 +789,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SE])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
+         (D.f[DIR_PM0])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
       }
@@ -769,7 +797,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BW])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
+         (D.f[DIR_M0M])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
       }
@@ -777,7 +805,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TE])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
+         (D.f[DIR_P0P])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
       }
@@ -785,7 +813,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TW])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
+         (D.f[DIR_M0P])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
       }
@@ -793,7 +821,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BE])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
+         (D.f[DIR_P0M])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
       }
@@ -801,7 +829,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BS])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_0MM])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
       }
@@ -809,7 +837,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TN])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_0PP])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
       }
@@ -817,7 +845,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TS])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_0MP])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
       }
@@ -825,7 +853,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BN])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_0PM])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
       }
@@ -833,7 +861,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSW])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_MMM])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
@@ -842,7 +870,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNE])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_PPP])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
@@ -851,7 +879,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSW])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_MMP])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
@@ -860,7 +888,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNE])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_PPM])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
@@ -869,7 +897,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNW])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_MPM])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
@@ -878,7 +906,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSE])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_PMP])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
@@ -887,7 +915,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNW])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_MPP])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
          wallMomentumZ -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
@@ -896,7 +924,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSE])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_PMM])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
          wallMomentumZ += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
@@ -913,7 +941,7 @@ extern "C" __global__ void QStressDeviceComp27(real* DD,
 }
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void BBStressDevice27( real* DD,
+__global__ void BBStressDevice27( real* DD,
 											            int* k_Q,
                                              int* k_N,
                                              real* QQ,
@@ -946,63 +974,63 @@ extern "C" __global__ void BBStressDevice27( real* DD,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    }
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index
@@ -1023,32 +1051,32 @@ extern "C" __global__ void BBStressDevice27( real* DD,
          *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
          *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
          *q_dirBSE, *q_dirBNW;
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1084,38 +1112,38 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1133,63 +1161,63 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       }
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
@@ -1441,49 +1469,796 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[W])[kw] = f_W_in - (c6o1*c2o27*( VeloX     ));
+         (D.f[DIR_M00])[kw] = f_W_in - (c6o1*c2o27*( VeloX     ));
+         wallMomentumX += -(c6o1*c2o27*( VeloX     ));
+      }
+
+      q = q_dirW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_P00])[ke] = f_E_in - (c6o1*c2o27*(-VeloX     ));
+         wallMomentumX -= - (c6o1*c2o27*(-VeloX     ));
+      }
+
+      q = q_dirN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0M0])[ks] = f_S_in - (c6o1*c2o27*( VeloY     ));
+         wallMomentumY += - (c6o1*c2o27*( VeloY     ));
+      }
+
+      q = q_dirS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0P0])[kn] = f_N_in - (c6o1*c2o27*(-VeloY     ));
+         wallMomentumY -=  -(c6o1*c2o27*(-VeloY     ));
+      }
+
+      q = q_dirT[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_00M])[kb] = f_B_in - (c6o1*c2o27*( VeloZ     ));
+         wallMomentumZ += - (c6o1*c2o27*( VeloZ     ));
+      }
+
+      q = q_dirB[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_00P])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ     ));
+         wallMomentumZ -= -(c6o1*c2o27*(-VeloZ     ));
+      }
+
+      q = q_dirNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MM0])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY));
+         wallMomentumX +=  -(c6o1*c1o54*(VeloX+VeloY));
+         wallMomentumY +=  -(c6o1*c1o54*(VeloX+VeloY));
+      }
+
+      q = q_dirSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PP0])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY));
+         wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloY));
+         wallMomentumY -= - (c6o1*c1o54*(-VeloX-VeloY));
+      }
+
+      q = q_dirSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MP0])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY));
+         wallMomentumX += -(c6o1*c1o54*( VeloX-VeloY));
+         wallMomentumY -= -(c6o1*c1o54*( VeloX-VeloY));
+      }
+
+      q = q_dirNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PM0])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY));
+         wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloY));
+         wallMomentumY += - (c6o1*c1o54*(-VeloX+VeloY));
+      }
+
+      q = q_dirTE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_M0M])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ));
+         wallMomentumX += - (c6o1*c1o54*( VeloX+VeloZ));
+         wallMomentumZ += - (c6o1*c1o54*( VeloX+VeloZ));
+      }
+
+      q = q_dirBW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_P0P])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ));
+         wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o54*(-VeloX-VeloZ));
+      }
+
+      q = q_dirBE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_M0P])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ));
+         wallMomentumX += - (c6o1*c1o54*( VeloX-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o54*( VeloX-VeloZ));
+      }
+
+      q = q_dirTW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_P0M])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ));
+         wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloZ));
+         wallMomentumZ += - (c6o1*c1o54*(-VeloX+VeloZ));
+      }
+
+      q = q_dirTN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0MM])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ));
+         wallMomentumY += - (c6o1*c1o54*( VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o54*( VeloY+VeloZ));
+      }
+
+      q = q_dirBS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0PP])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ));
+         wallMomentumY -= - (c6o1*c1o54*( -VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o54*( -VeloY-VeloZ));
+      }
+
+      q = q_dirBN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0MP])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ));
+         wallMomentumY += - (c6o1*c1o54*( VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o54*( VeloY-VeloZ));
+      }
+
+      q = q_dirTS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_0PM])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ));
+         wallMomentumY -= - (c6o1*c1o54*( -VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o54*( -VeloY+VeloZ));
+      }
+
+      q = q_dirTNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MMM])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
+         wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
+         wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
+      }
+
+      q = q_dirBSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PPP])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
+         wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
+         wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
+      }
+
+      q = q_dirBNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MMP])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
+         wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
+         wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
+      }
+
+      q = q_dirTSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PPM])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
+         wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
+         wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
+      }
+
+      q = q_dirTSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MPM])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
+         wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
+         wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
+      }
+
+      q = q_dirBNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PMP])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
+         wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
+         wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
+      }
+
+      q = q_dirBSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_MPP])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
+         wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
+         wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
+         wallMomentumZ -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
+      }
+
+      q = q_dirTNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_PMM])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
+         wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
+         wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
+         wallMomentumZ += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
+      }
+
+      if(hasWallModelMonitor)
+      {
+         Fx_monitor[k] = wallMomentumX;
+         Fy_monitor[k] = wallMomentumY;
+         Fz_monitor[k] = wallMomentumZ;
+      }
+
+   }
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+__global__ void BBStressPressureDevice27( real* DD,
+											            int* k_Q,
+                                             int* k_N,
+                                             real* QQ,
+                                             unsigned int  numberOfBCnodes,
+                                             real* vx,
+                                             real* vy,
+                                             real* vz,
+                                             real* normalX,
+                                             real* normalY,
+                                             real* normalZ,
+                                             real* vx_el,
+                                             real* vy_el,
+                                             real* vz_el,
+                                             real* vx_w_mean,
+                                             real* vy_w_mean,
+                                             real* vz_w_mean,
+                                             int* samplingOffset,
+                                             real* z0,
+                                             bool  hasWallModelMonitor,
+                                             real* u_star_monitor,
+                                             real* Fx_monitor,
+                                             real* Fy_monitor,
+                                             real* Fz_monitor,
+                                             unsigned int* neighborX,
+                                             unsigned int* neighborY,
+                                             unsigned int* neighborZ,
+                                             unsigned int size_Mat,
+                                             bool isEvenTimestep)
+{
+   Distributions27 D;
+   if (isEvenTimestep==true)
+   {
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+   }
+   else
+   {
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+   }
+   ////////////////////////////////////////////////////////////////////////////////
+   const unsigned  x = threadIdx.x;  // Globaler x-Index
+   const unsigned  y = blockIdx.x;   // Globaler y-Index
+   const unsigned  z = blockIdx.y;   // Globaler z-Index
+
+   const unsigned nx = blockDim.x;
+   const unsigned ny = gridDim.x;
+
+   const unsigned k = nx*(ny*z + y) + x;
+   //////////////////////////////////////////////////////////////////////////
+
+   if(k< numberOfBCnodes)
+   {
+      ////////////////////////////////////////////////////////////////////////////////
+      real *q_dirE,   *q_dirW,   *q_dirN,   *q_dirS,   *q_dirT,   *q_dirB,
+         *q_dirNE,  *q_dirSW,  *q_dirSE,  *q_dirNW,  *q_dirTE,  *q_dirBW,
+         *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
+         *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
+         *q_dirBSE, *q_dirBNW;
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
+      ////////////////////////////////////////////////////////////////////////////////
+      //index
+      unsigned int KQK  = k_Q[k];
+      unsigned int kzero= KQK;
+      unsigned int ke   = KQK;
+      unsigned int kw   = neighborX[KQK];
+      unsigned int kn   = KQK;
+      unsigned int ks   = neighborY[KQK];
+      unsigned int kt   = KQK;
+      unsigned int kb   = neighborZ[KQK];
+      unsigned int ksw  = neighborY[kw];
+      unsigned int kne  = KQK;
+      unsigned int kse  = ks;
+      unsigned int knw  = kw;
+      unsigned int kbw  = neighborZ[kw];
+      unsigned int kte  = KQK;
+      unsigned int kbe  = kb;
+      unsigned int ktw  = kw;
+      unsigned int kbs  = neighborZ[ks];
+      unsigned int ktn  = KQK;
+      unsigned int kbn  = kb;
+      unsigned int kts  = ks;
+      unsigned int ktse = ks;
+      unsigned int kbnw = kbw;
+      unsigned int ktnw = kw;
+      unsigned int kbse = kbs;
+      unsigned int ktsw = ksw;
+      unsigned int kbne = kb;
+      unsigned int ktne = KQK;
+      unsigned int kbsw = neighborZ[ksw];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
+         f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
+
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+
+      ////////////////////////////////////////////////////////////////////////////////
+      real vx1, vx2, vx3, drho;
+      drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
+                f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
+
+      vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
+                (f_E - f_W)) / (c1o1 + drho);
+
+
+      vx2    =   ((-(f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
+                 ((f_BN - f_TS)   + (f_TN - f_BS))    + (-(f_SE - f_NW)  + (f_NE - f_SW)) +
+                 (f_N - f_S)) / (c1o1 + drho);
+
+      vx3    =   (((f_TSE - f_BNW) + (f_TNW - f_BSE)) + ((f_TNE - f_BSW) + (f_TSW - f_BNE)) +
+                 (-(f_BN - f_TS)  + (f_TN - f_BS))   + ((f_TE - f_BW)   - (f_BE - f_TW)) +
+                 (f_T - f_B)) / (c1o1 + drho);
+
+      //////////////////////////////////////////////////////////////////////////
+      if (isEvenTimestep==false)
+      {
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
+      }
+      else
+      {
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+      }
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      real f_E_in,  f_W_in,  f_N_in,  f_S_in,  f_T_in,  f_B_in,   f_NE_in,  f_SW_in,  f_SE_in,  f_NW_in,  f_TE_in,  f_BW_in,  f_BE_in,
+         f_TW_in, f_TN_in, f_BS_in, f_BN_in, f_TS_in, f_TNE_in, f_TSW_in, f_TSE_in, f_TNW_in, f_BNE_in, f_BSW_in, f_BSE_in, f_BNW_in;
+
+      // momentum exchanged with wall at rest
+      real wallMomentumX = 0.0, wallMomentumY = 0.0, wallMomentumZ = 0.0;
+
+      real q;
+      q = q_dirE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_W_in=f_E - c2o27 * drho;
+         wallMomentumX += f_E+f_W_in;
+      }
+
+      q = q_dirW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_E_in=f_W - c2o27 * drho;
+          wallMomentumX -= f_W+f_E_in;
+      }
+
+      q = q_dirN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_S_in=f_N - c2o27 * drho;
+         wallMomentumY += f_N+f_S_in;
+      }
+
+      q = q_dirS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_N_in=f_S - c2o27 * drho;
+         wallMomentumY -= f_S+f_N_in;
+      }
+
+      q = q_dirT[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_B_in=f_T - c2o27 * drho;
+         wallMomentumZ += f_T+f_B_in;
+      }
+
+      q = q_dirB[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_T_in=f_B - c2o27 * drho;
+         wallMomentumZ -= f_B+f_T_in;
+      }
+
+      q = q_dirNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_SW_in=f_NE - c1o54 * drho;
+         wallMomentumX += f_NE+f_SW_in;
+         wallMomentumY += f_NE+f_SW_in;
+      }
+
+      q = q_dirSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_NE_in=f_SW - c1o54 * drho;
+         wallMomentumX -= f_SW+f_NE_in;
+         wallMomentumY -= f_SW+f_NE_in;
+      }
+
+      q = q_dirSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_NW_in=f_SE - c1o54 * drho;
+         wallMomentumX += f_SE+f_NW_in;
+         wallMomentumY -= f_SE+f_NW_in;
+      }
+
+      q = q_dirNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_SE_in=f_NW - c1o54 * drho;
+         wallMomentumX -= f_NW+f_SE_in;
+         wallMomentumY += f_NW+f_SE_in;
+      }
+
+      q = q_dirTE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BW_in=f_TE - c1o54 * drho;
+         wallMomentumX += f_TE+f_BW_in;
+         wallMomentumZ += f_TE+f_BW_in;
+      }
+
+      q = q_dirBW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TE_in=f_BW - c1o54 * drho;
+         wallMomentumX -= f_BW+f_TE_in;
+         wallMomentumZ -= f_BW+f_TE_in;
+      }
+
+      q = q_dirBE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TW_in=f_BE - c1o54 * drho;
+         wallMomentumX += f_BE+f_TW_in;
+         wallMomentumZ -= f_BE+f_TW_in;
+      }
+
+      q = q_dirTW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BE_in=f_TW - c1o54 * drho;
+         wallMomentumX -= f_TW+f_BE_in;
+         wallMomentumZ += f_TW+f_BE_in;
+      }
+
+      q = q_dirTN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BS_in=f_TN - c1o54 * drho;
+         wallMomentumY += f_TN+f_BS_in;
+         wallMomentumZ += f_TN+f_BS_in;
+      }
+
+      q = q_dirBS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TN_in=f_BS - c1o54 * drho;
+         wallMomentumY -= f_BS+f_TN_in;
+         wallMomentumZ -= f_BS+f_TN_in;
+      }
+
+      q = q_dirBN[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TS_in=f_BN - c1o54 * drho;
+         wallMomentumY += f_BN+f_TS_in;
+         wallMomentumZ -= f_BN+f_TS_in;
+      }
+
+      q = q_dirTS[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BN_in=f_TS - c1o54 * drho;
+         wallMomentumY -= f_TS+f_BN_in;
+         wallMomentumZ += f_TS+f_BN_in;
+      }
+
+      q = q_dirTNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BSW_in=f_TNE - c1o216 * drho;
+         wallMomentumX += f_TNE+f_BSW_in;
+         wallMomentumY += f_TNE+f_BSW_in;
+         wallMomentumZ += f_TNE+f_BSW_in;
+      }
+
+      q = q_dirBSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TNE_in=f_BSW - c1o216 * drho;
+         wallMomentumX -= f_BSW+f_TNE_in;
+         wallMomentumY -= f_BSW+f_TNE_in;
+         wallMomentumZ -= f_BSW+f_TNE_in;
+      }
+
+      q = q_dirBNE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TSW_in=f_BNE - c1o216 * drho;
+         wallMomentumX += f_BNE+f_TSW_in;
+         wallMomentumY += f_BNE+f_TSW_in;
+         wallMomentumZ -= f_BNE+f_TSW_in;
+      }
+
+      q = q_dirTSW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BNE_in=f_TSW - c1o216 * drho;
+         wallMomentumX -= f_TSW+f_BNE_in;
+         wallMomentumY -= f_TSW+f_BNE_in;
+         wallMomentumZ += f_TSW+f_BNE_in;
+      }
+
+      q = q_dirTSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BNW_in=f_TSE - c1o216 * drho;
+         wallMomentumX += f_TSE+f_BNW_in;
+         wallMomentumY -= f_TSE+f_BNW_in;
+         wallMomentumZ += f_TSE+f_BNW_in;
+      }
+
+      q = q_dirBNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TSE_in=f_BNW - c1o216 * drho;
+         wallMomentumX -= f_BNW+f_TSE_in;
+         wallMomentumY += f_BNW+f_TSE_in;
+         wallMomentumZ -= f_BNW+f_TSE_in;
+      }
+
+      q = q_dirBSE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_TNW_in=f_BSE - c1o216 * drho;
+         wallMomentumX += f_BSE+f_TNW_in;
+         wallMomentumY -= f_BSE+f_TNW_in;
+         wallMomentumZ -= f_BSE+f_TNW_in;
+      }
+
+      q = q_dirTNW[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         f_BSE_in=f_TNW - c1o216 * drho;
+         wallMomentumX -= f_TNW+f_BSE_in;
+         wallMomentumY += f_TNW+f_BSE_in;
+         wallMomentumZ += f_TNW+f_BSE_in;
+      }
+
+      ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      // //Compute wall velocity
+      // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      real VeloX=0.0, VeloY=0.0, VeloZ=0.0;
+
+      q = 0.5f;
+      real eps = 0.001f;
+
+      iMEM( k, k_N[k],
+         normalX, normalY, normalZ,
+         vx, vy, vz,
+         vx_el,      vy_el,      vz_el,
+         vx_w_mean,  vy_w_mean,  vz_w_mean,
+         vx1,        vx2,        vx3,
+         c1o1+drho,
+         samplingOffset,
+         q,
+         1.0,
+         eps,
+         z0,
+         hasWallModelMonitor,
+         u_star_monitor,
+         wallMomentumX, wallMomentumY, wallMomentumZ,
+         VeloX, VeloY, VeloZ);
+
+      // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+      // //Add wall velocity and write f's
+      // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+      q = q_dirE[k];
+      if (q>=c0o1 && q<=c1o1)
+      {
+         (D.f[DIR_M00])[kw] = f_W_in - (c6o1*c2o27*( VeloX     ));
          wallMomentumX += -(c6o1*c2o27*( VeloX     ));
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[E])[ke] = f_E_in - (c6o1*c2o27*(-VeloX     ));
+         (D.f[DIR_P00])[ke] = f_E_in - (c6o1*c2o27*(-VeloX     ));
          wallMomentumX -= - (c6o1*c2o27*(-VeloX     ));
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[S])[ks] = f_S_in - (c6o1*c2o27*( VeloY     ));
+         (D.f[DIR_0M0])[ks] = f_S_in - (c6o1*c2o27*( VeloY     ));
          wallMomentumY += - (c6o1*c2o27*( VeloY     ));
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[N])[kn] = f_N_in - (c6o1*c2o27*(-VeloY     ));
+         (D.f[DIR_0P0])[kn] = f_N_in - (c6o1*c2o27*(-VeloY     ));
          wallMomentumY -=  -(c6o1*c2o27*(-VeloY     ));
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[B])[kb] = f_B_in - (c6o1*c2o27*( VeloZ     ));
+         (D.f[DIR_00M])[kb] = f_B_in - (c6o1*c2o27*( VeloZ     ));
          wallMomentumZ += - (c6o1*c2o27*( VeloZ     ));
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[T])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ     ));
+         (D.f[DIR_00P])[kt] = f_T_in - (c6o1*c2o27*(-VeloZ     ));
          wallMomentumZ -= -(c6o1*c2o27*(-VeloZ     ));
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SW])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY));
+         (D.f[DIR_MM0])[ksw] = f_SW_in - (c6o1*c1o54*(VeloX+VeloY));
          wallMomentumX +=  -(c6o1*c1o54*(VeloX+VeloY));
          wallMomentumY +=  -(c6o1*c1o54*(VeloX+VeloY));
       }
@@ -1491,7 +2266,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NE])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY));
+         (D.f[DIR_PP0])[kne] = f_NE_in - (c6o1*c1o54*(-VeloX-VeloY));
          wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloY));
          wallMomentumY -= - (c6o1*c1o54*(-VeloX-VeloY));
       }
@@ -1499,7 +2274,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[NW])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY));
+         (D.f[DIR_MP0])[knw] = f_NW_in - (c6o1*c1o54*( VeloX-VeloY));
          wallMomentumX += -(c6o1*c1o54*( VeloX-VeloY));
          wallMomentumY -= -(c6o1*c1o54*( VeloX-VeloY));
       }
@@ -1507,7 +2282,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[SE])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY));
+         (D.f[DIR_PM0])[kse] = f_SE_in - (c6o1*c1o54*(-VeloX+VeloY));
          wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloY));
          wallMomentumY += - (c6o1*c1o54*(-VeloX+VeloY));
       }
@@ -1515,7 +2290,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BW])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ));
+         (D.f[DIR_M0M])[kbw] = f_BW_in - (c6o1*c1o54*( VeloX+VeloZ));
          wallMomentumX += - (c6o1*c1o54*( VeloX+VeloZ));
          wallMomentumZ += - (c6o1*c1o54*( VeloX+VeloZ));
       }
@@ -1523,7 +2298,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TE])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ));
+         (D.f[DIR_P0P])[kte] = f_TE_in - (c6o1*c1o54*(-VeloX-VeloZ));
          wallMomentumX -= - (c6o1*c1o54*(-VeloX-VeloZ));
          wallMomentumZ -= - (c6o1*c1o54*(-VeloX-VeloZ));
       }
@@ -1531,7 +2306,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TW])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ));
+         (D.f[DIR_M0P])[ktw] = f_TW_in - (c6o1*c1o54*( VeloX-VeloZ));
          wallMomentumX += - (c6o1*c1o54*( VeloX-VeloZ));
          wallMomentumZ -= - (c6o1*c1o54*( VeloX-VeloZ));
       }
@@ -1539,7 +2314,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BE])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ));
+         (D.f[DIR_P0M])[kbe] = f_BE_in - (c6o1*c1o54*(-VeloX+VeloZ));
          wallMomentumX -= - (c6o1*c1o54*(-VeloX+VeloZ));
          wallMomentumZ += - (c6o1*c1o54*(-VeloX+VeloZ));
       }
@@ -1547,7 +2322,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BS])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ));
+         (D.f[DIR_0MM])[kbs] = f_BS_in - (c6o1*c1o54*( VeloY+VeloZ));
          wallMomentumY += - (c6o1*c1o54*( VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o54*( VeloY+VeloZ));
       }
@@ -1555,7 +2330,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TN])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ));
+         (D.f[DIR_0PP])[ktn] = f_TN_in - (c6o1*c1o54*( -VeloY-VeloZ));
          wallMomentumY -= - (c6o1*c1o54*( -VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o54*( -VeloY-VeloZ));
       }
@@ -1563,7 +2338,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TS])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ));
+         (D.f[DIR_0MP])[kts] = f_TS_in - (c6o1*c1o54*( VeloY-VeloZ));
          wallMomentumY += - (c6o1*c1o54*( VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o54*( VeloY-VeloZ));
       }
@@ -1571,7 +2346,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BN])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ));
+         (D.f[DIR_0PM])[kbn] = f_BN_in - (c6o1*c1o54*( -VeloY+VeloZ));
          wallMomentumY -= - (c6o1*c1o54*( -VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o54*( -VeloY+VeloZ));
       }
@@ -1579,7 +2354,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSW])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
+         (D.f[DIR_MMM])[kbsw] = f_BSW_in - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
          wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
          wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o216*( VeloX+VeloY+VeloZ));
@@ -1588,7 +2363,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNE])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
+         (D.f[DIR_PPP])[ktne] = f_TNE_in - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
          wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
          wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o216*(-VeloX-VeloY-VeloZ));
@@ -1597,7 +2372,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSW])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
+         (D.f[DIR_MMP])[ktsw] = f_TSW_in - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
          wallMomentumX += - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
          wallMomentumY += - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o216*( VeloX+VeloY-VeloZ));
@@ -1606,7 +2381,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNE])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
+         (D.f[DIR_PPM])[kbne] = f_BNE_in - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
          wallMomentumX -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
          wallMomentumY -= - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o216*(-VeloX-VeloY+VeloZ));
@@ -1615,7 +2390,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BNW])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
+         (D.f[DIR_MPM])[kbnw] = f_BNW_in - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
          wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
          wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o216*( VeloX-VeloY+VeloZ));
@@ -1624,7 +2399,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TSE])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
+         (D.f[DIR_PMP])[ktse] = f_TSE_in - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
          wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
          wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o216*(-VeloX+VeloY-VeloZ));
@@ -1633,7 +2408,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[TNW])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
+         (D.f[DIR_MPP])[ktnw] = f_TNW_in - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
          wallMomentumX += - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
          wallMomentumY -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
          wallMomentumZ -= - (c6o1*c1o216*( VeloX-VeloY-VeloZ));
@@ -1642,7 +2417,7 @@ extern "C" __global__ void BBStressDevice27( real* DD,
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
-         (D.f[BSE])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
+         (D.f[DIR_PMM])[kbse] = f_BSE_in - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
          wallMomentumX -= - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
          wallMomentumY += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
          wallMomentumZ += - (c6o1*c1o216*(-VeloX+VeloY+VeloZ));
diff --git a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
index 9df139b63ab4d7e636c8e976ee92bb8d38b595c6..55f810628f370976289d1492e9916d5d3fa0dbb8 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/ThinWallBCs27.cu
@@ -15,14 +15,14 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 /////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
+__global__ void QVelDeviceCompThinWallsPartOne27(
 	real* vx,
 	real* vy,
 	real* vz,
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
-	int numberOfBCnodes, 
+	uint numberOfBCnodes, 
 	real om1, 
 	uint* neighborX,
 	uint* neighborY,
@@ -33,63 +33,63 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -114,32 +114,32 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       uint KQK  = k_Q[k];
@@ -174,37 +174,37 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -227,182 +227,182 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho + c9o2 * ( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq);
-		 (D.f[W])[kw] = (c1o1 - q) / (c1o1 + q)*(f_E - f_W + (f_E + f_W - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_E + f_W) - c6o1*c2o27*(VeloX)) / (c1o1 + q);
+		 (D.f[DIR_M00])[kw] = (c1o1 - q) / (c1o1 + q)*(f_E - f_W + (f_E + f_W - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_E + f_W) - c6o1*c2o27*(VeloX)) / (c1o1 + q);
 	  }
 
 	  q = q_dirW[k];
 	  if (q >= c0o1 && q <= c1o1)
 	  {
 		  feq = c2o27* (drho + c9o2 * (-vx1)*(-vx1) * (c1o1 + drho) - cu_sq);
-		  (D.f[E])[ke] = (c1o1 - q) / (c1o1 + q)*(f_W - f_E + (f_W + f_E - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_W + f_E) - c6o1*c2o27*(-VeloX)) / (c1o1 + q);
+		  (D.f[DIR_P00])[ke] = (c1o1 - q) / (c1o1 + q)*(f_W - f_E + (f_W + f_E - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_W + f_E) - c6o1*c2o27*(-VeloX)) / (c1o1 + q);
 	  }
 
 	  q = q_dirN[k];
 	  if (q >= c0o1 && q <= c1o1)
 	  {
 		  feq = c2o27* (drho + c9o2 * (vx2)*(vx2) * (c1o1 + drho) - cu_sq);
-		  (D.f[S])[ks] = (c1o1 - q) / (c1o1 + q)*(f_N - f_S + (f_N + f_S - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_N + f_S) - c6o1*c2o27*(VeloY)) / (c1o1 + q);
+		  (D.f[DIR_0M0])[ks] = (c1o1 - q) / (c1o1 + q)*(f_N - f_S + (f_N + f_S - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_N + f_S) - c6o1*c2o27*(VeloY)) / (c1o1 + q);
 	  }
 
 	  q = q_dirS[k];
 	  if (q >= c0o1 && q <= c1o1)
 	  {
 		  feq = c2o27* (drho + c9o2 * (-vx2)*(-vx2) * (c1o1 + drho) - cu_sq);
-		  (D.f[N])[kn] = (c1o1 - q) / (c1o1 + q)*(f_S - f_N + (f_S + f_N - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_S + f_N) - c6o1*c2o27*(-VeloY)) / (c1o1 + q);
+		  (D.f[DIR_0P0])[kn] = (c1o1 - q) / (c1o1 + q)*(f_S - f_N + (f_S + f_N - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_S + f_N) - c6o1*c2o27*(-VeloY)) / (c1o1 + q);
 	  }
 
 	  q = q_dirT[k];
 	  if (q >= c0o1 && q <= c1o1)
 	  {
 		  feq = c2o27* (drho + c9o2 * (vx3)*(vx3) * (c1o1 + drho) - cu_sq);
-		  (D.f[B])[kb] = (c1o1 - q) / (c1o1 + q)*(f_T - f_B + (f_T + f_B - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_T + f_B) - c6o1*c2o27*(VeloZ)) / (c1o1 + q);
+		  (D.f[DIR_00M])[kb] = (c1o1 - q) / (c1o1 + q)*(f_T - f_B + (f_T + f_B - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_T + f_B) - c6o1*c2o27*(VeloZ)) / (c1o1 + q);
 	  }
 
 	  q = q_dirB[k];
 	  if (q >= c0o1 && q <= c1o1)
 	  {
 		  feq = c2o27* (drho + c9o2 * (-vx3)*(-vx3) * (c1o1 + drho) - cu_sq);
-		  (D.f[T])[kt] = (c1o1 - q) / (c1o1 + q)*(f_B - f_T + (f_B + f_T - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_B + f_T) - c6o1*c2o27*(-VeloZ)) / (c1o1 + q);
+		  (D.f[DIR_00P])[kt] = (c1o1 - q) / (c1o1 + q)*(f_B - f_T + (f_B + f_T - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_B + f_T) - c6o1*c2o27*(-VeloZ)) / (c1o1 + q);
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * ( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq);
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*( VeloX+VeloY))/(c1o1+q);
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*( VeloX+VeloY))/(c1o1+q);
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq);
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * ( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq);
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq);
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * ( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * ( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*(-VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*(-VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho + c9o2 * (    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq);
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*(-VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*(-VeloY+VeloZ))/(c1o1+q);
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * ( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * (-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * ( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * (-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * ( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * (-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * ( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho + c9o2 * (-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
       }
    }
 }
@@ -447,7 +447,7 @@ extern "C" __global__ void QVelDeviceCompThinWallsPartOne27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QDeviceCompThinWallsPartOne27(
+__global__ void QDeviceCompThinWallsPartOne27(
 	real* DD,
 	int* k_Q,
 	real* QQ,
@@ -462,63 +462,63 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 	Distributions27 D;
 	if (isEvenTimestep == true)
 	{
-		D.f[E] = &DD[E   *size_Mat];
-		D.f[W] = &DD[W   *size_Mat];
-		D.f[N] = &DD[N   *size_Mat];
-		D.f[S] = &DD[S   *size_Mat];
-		D.f[T] = &DD[T   *size_Mat];
-		D.f[B] = &DD[B   *size_Mat];
-		D.f[NE] = &DD[NE  *size_Mat];
-		D.f[SW] = &DD[SW  *size_Mat];
-		D.f[SE] = &DD[SE  *size_Mat];
-		D.f[NW] = &DD[NW  *size_Mat];
-		D.f[TE] = &DD[TE  *size_Mat];
-		D.f[BW] = &DD[BW  *size_Mat];
-		D.f[BE] = &DD[BE  *size_Mat];
-		D.f[TW] = &DD[TW  *size_Mat];
-		D.f[TN] = &DD[TN  *size_Mat];
-		D.f[BS] = &DD[BS  *size_Mat];
-		D.f[BN] = &DD[BN  *size_Mat];
-		D.f[TS] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[TNE *size_Mat];
-		D.f[TSW] = &DD[TSW *size_Mat];
-		D.f[TSE] = &DD[TSE *size_Mat];
-		D.f[TNW] = &DD[TNW *size_Mat];
-		D.f[BNE] = &DD[BNE *size_Mat];
-		D.f[BSW] = &DD[BSW *size_Mat];
-		D.f[BSE] = &DD[BSE *size_Mat];
-		D.f[BNW] = &DD[BNW *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
 	}
 	else
 	{
-		D.f[W] = &DD[E   *size_Mat];
-		D.f[E] = &DD[W   *size_Mat];
-		D.f[S] = &DD[N   *size_Mat];
-		D.f[N] = &DD[S   *size_Mat];
-		D.f[B] = &DD[T   *size_Mat];
-		D.f[T] = &DD[B   *size_Mat];
-		D.f[SW] = &DD[NE  *size_Mat];
-		D.f[NE] = &DD[SW  *size_Mat];
-		D.f[NW] = &DD[SE  *size_Mat];
-		D.f[SE] = &DD[NW  *size_Mat];
-		D.f[BW] = &DD[TE  *size_Mat];
-		D.f[TE] = &DD[BW  *size_Mat];
-		D.f[TW] = &DD[BE  *size_Mat];
-		D.f[BE] = &DD[TW  *size_Mat];
-		D.f[BS] = &DD[TN  *size_Mat];
-		D.f[TN] = &DD[BS  *size_Mat];
-		D.f[TS] = &DD[BN  *size_Mat];
-		D.f[BN] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE] = &DD[BSW *size_Mat];
-		D.f[TSW] = &DD[BNE *size_Mat];
-		D.f[TSE] = &DD[BNW *size_Mat];
-		D.f[TNW] = &DD[BSE *size_Mat];
-		D.f[BNE] = &DD[TSW *size_Mat];
-		D.f[BSW] = &DD[TNE *size_Mat];
-		D.f[BSE] = &DD[TNW *size_Mat];
-		D.f[BNW] = &DD[TSE *size_Mat];
+		D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -539,32 +539,32 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 			*q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW;
-		q_dirE = &QQ[E   * numberOfBCnodes];
-		q_dirW = &QQ[W   * numberOfBCnodes];
-		q_dirN = &QQ[N   * numberOfBCnodes];
-		q_dirS = &QQ[S   * numberOfBCnodes];
-		q_dirT = &QQ[T   * numberOfBCnodes];
-		q_dirB = &QQ[B   * numberOfBCnodes];
-		q_dirNE = &QQ[NE  * numberOfBCnodes];
-		q_dirSW = &QQ[SW  * numberOfBCnodes];
-		q_dirSE = &QQ[SE  * numberOfBCnodes];
-		q_dirNW = &QQ[NW  * numberOfBCnodes];
-		q_dirTE = &QQ[TE  * numberOfBCnodes];
-		q_dirBW = &QQ[BW  * numberOfBCnodes];
-		q_dirBE = &QQ[BE  * numberOfBCnodes];
-		q_dirTW = &QQ[TW  * numberOfBCnodes];
-		q_dirTN = &QQ[TN  * numberOfBCnodes];
-		q_dirBS = &QQ[BS  * numberOfBCnodes];
-		q_dirBN = &QQ[BN  * numberOfBCnodes];
-		q_dirTS = &QQ[TS  * numberOfBCnodes];
-		q_dirTNE = &QQ[TNE * numberOfBCnodes];
-		q_dirTSW = &QQ[TSW * numberOfBCnodes];
-		q_dirTSE = &QQ[TSE * numberOfBCnodes];
-		q_dirTNW = &QQ[TNW * numberOfBCnodes];
-		q_dirBNE = &QQ[BNE * numberOfBCnodes];
-		q_dirBSW = &QQ[BSW * numberOfBCnodes];
-		q_dirBSE = &QQ[BSE * numberOfBCnodes];
-		q_dirBNW = &QQ[BNW * numberOfBCnodes];
+		q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
+		q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
+		q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
+		q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
+		q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
+		q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
+		q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
+		q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
+		q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
+		q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
+		q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
+		q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
+		q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
+		q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
+		q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
+		q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
+		q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
+		q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK = k_Q[k];
@@ -599,37 +599,37 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 		real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE,
 			f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		f_W = (D.f[E])[ke];
-		f_E = (D.f[W])[kw];
-		f_S = (D.f[N])[kn];
-		f_N = (D.f[S])[ks];
-		f_B = (D.f[T])[kt];
-		f_T = (D.f[B])[kb];
-		f_SW = (D.f[NE])[kne];
-		f_NE = (D.f[SW])[ksw];
-		f_NW = (D.f[SE])[kse];
-		f_SE = (D.f[NW])[knw];
-		f_BW = (D.f[TE])[kte];
-		f_TE = (D.f[BW])[kbw];
-		f_TW = (D.f[BE])[kbe];
-		f_BE = (D.f[TW])[ktw];
-		f_BS = (D.f[TN])[ktn];
-		f_TN = (D.f[BS])[kbs];
-		f_TS = (D.f[BN])[kbn];
-		f_BN = (D.f[TS])[kts];
-		f_BSW = (D.f[TNE])[ktne];
-		f_BNE = (D.f[TSW])[ktsw];
-		f_BNW = (D.f[TSE])[ktse];
-		f_BSE = (D.f[TNW])[ktnw];
-		f_TSW = (D.f[BNE])[kbne];
-		f_TNE = (D.f[BSW])[kbsw];
-		f_TNW = (D.f[BSE])[kbse];
-		f_TSE = (D.f[BNW])[kbnw];
+		f_W = (D.f[DIR_P00])[ke];
+		f_E = (D.f[DIR_M00])[kw];
+		f_S = (D.f[DIR_0P0])[kn];
+		f_N = (D.f[DIR_0M0])[ks];
+		f_B = (D.f[DIR_00P])[kt];
+		f_T = (D.f[DIR_00M])[kb];
+		f_SW = (D.f[DIR_PP0])[kne];
+		f_NE = (D.f[DIR_MM0])[ksw];
+		f_NW = (D.f[DIR_PM0])[kse];
+		f_SE = (D.f[DIR_MP0])[knw];
+		f_BW = (D.f[DIR_P0P])[kte];
+		f_TE = (D.f[DIR_M0M])[kbw];
+		f_TW = (D.f[DIR_P0M])[kbe];
+		f_BE = (D.f[DIR_M0P])[ktw];
+		f_BS = (D.f[DIR_0PP])[ktn];
+		f_TN = (D.f[DIR_0MM])[kbs];
+		f_TS = (D.f[DIR_0PM])[kbn];
+		f_BN = (D.f[DIR_0MP])[kts];
+		f_BSW = (D.f[DIR_PPP])[ktne];
+		f_BNE = (D.f[DIR_MMP])[ktsw];
+		f_BNW = (D.f[DIR_PMP])[ktse];
+		f_BSE = (D.f[DIR_MPP])[ktnw];
+		f_TSW = (D.f[DIR_PPM])[kbne];
+		f_TNE = (D.f[DIR_MMM])[kbsw];
+		f_TNW = (D.f[DIR_PMM])[kbse];
+		f_TSE = (D.f[DIR_MPM])[kbnw];
 		////////////////////////////////////////////////////////////////////////////////
 		real vx1, vx2, vx3, drho, feq, q;
 		drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 			f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+			f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
 		vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
 			((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) +
@@ -652,182 +652,182 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(vx1)*(vx1) * (c1o1 + drho) - cu_sq);
-			(D.f[W])[kw] = (c1o1 - q) / (c1o1 + q)*(f_E - f_W + (f_E + f_W - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_E + f_W)) / (c1o1 + q);
+			(D.f[DIR_M00])[kw] = (c1o1 - q) / (c1o1 + q)*(f_E - f_W + (f_E + f_W - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_E + f_W)) / (c1o1 + q);
 		}
 
 		q = q_dirW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(-vx1)*(-vx1) * (c1o1 + drho) - cu_sq);
-			(D.f[E])[ke] = (c1o1 - q) / (c1o1 + q)*(f_W - f_E + (f_W + f_E - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_W + f_E)) / (c1o1 + q);
+			(D.f[DIR_P00])[ke] = (c1o1 - q) / (c1o1 + q)*(f_W - f_E + (f_W + f_E - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_W + f_E)) / (c1o1 + q);
 		}
 
 		q = q_dirN[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(vx2)*(vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[S])[ks] = (c1o1 - q) / (c1o1 + q)*(f_N - f_S + (f_N + f_S - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_N + f_S)) / (c1o1 + q);
+			(D.f[DIR_0M0])[ks] = (c1o1 - q) / (c1o1 + q)*(f_N - f_S + (f_N + f_S - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_N + f_S)) / (c1o1 + q);
 		}
 
 		q = q_dirS[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(-vx2)*(-vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[N])[kn] = (c1o1 - q) / (c1o1 + q)*(f_S - f_N + (f_S + f_N - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_S + f_N)) / (c1o1 + q);
+			(D.f[DIR_0P0])[kn] = (c1o1 - q) / (c1o1 + q)*(f_S - f_N + (f_S + f_N - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_S + f_N)) / (c1o1 + q);
 		}
 
 		q = q_dirT[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(vx3)*(vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[B])[kb] = (c1o1 - q) / (c1o1 + q)*(f_T - f_B + (f_T + f_B - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_T + f_B)) / (c1o1 + q);
+			(D.f[DIR_00M])[kb] = (c1o1 - q) / (c1o1 + q)*(f_T - f_B + (f_T + f_B - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_T + f_B)) / (c1o1 + q);
 		}
 
 		q = q_dirB[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c2o27* (drho + c9o2*(-vx3)*(-vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[T])[kt] = (c1o1 - q) / (c1o1 + q)*(f_B - f_T + (f_B + f_T - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_B + f_T)) / (c1o1 + q);
+			(D.f[DIR_00P])[kt] = (c1o1 - q) / (c1o1 + q)*(f_B - f_T + (f_B + f_T - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_B + f_T)) / (c1o1 + q);
 		}
 
 		q = q_dirNE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx1 + vx2)*(vx1 + vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[SW])[ksw] = (c1o1 - q) / (c1o1 + q)*(f_NE - f_SW + (f_NE + f_SW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_NE + f_SW)) / (c1o1 + q);
+			(D.f[DIR_MM0])[ksw] = (c1o1 - q) / (c1o1 + q)*(f_NE - f_SW + (f_NE + f_SW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_NE + f_SW)) / (c1o1 + q);
 		}
 
 		q = q_dirSW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx1 - vx2)*(-vx1 - vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[NE])[kne] = (c1o1 - q) / (c1o1 + q)*(f_SW - f_NE + (f_SW + f_NE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_SW + f_NE)) / (c1o1 + q);
+			(D.f[DIR_PP0])[kne] = (c1o1 - q) / (c1o1 + q)*(f_SW - f_NE + (f_SW + f_NE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_SW + f_NE)) / (c1o1 + q);
 		}
 
 		q = q_dirSE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx1 - vx2)*(vx1 - vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[NW])[knw] = (c1o1 - q) / (c1o1 + q)*(f_SE - f_NW + (f_SE + f_NW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_SE + f_NW)) / (c1o1 + q);
+			(D.f[DIR_MP0])[knw] = (c1o1 - q) / (c1o1 + q)*(f_SE - f_NW + (f_SE + f_NW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_SE + f_NW)) / (c1o1 + q);
 		}
 
 		q = q_dirNW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx1 + vx2)*(-vx1 + vx2) * (c1o1 + drho) - cu_sq);
-			(D.f[SE])[kse] = (c1o1 - q) / (c1o1 + q)*(f_NW - f_SE + (f_NW + f_SE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_NW + f_SE)) / (c1o1 + q);
+			(D.f[DIR_PM0])[kse] = (c1o1 - q) / (c1o1 + q)*(f_NW - f_SE + (f_NW + f_SE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_NW + f_SE)) / (c1o1 + q);
 		}
 
 		q = q_dirTE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx1 + vx3)*(vx1 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BW])[kbw] = (c1o1 - q) / (c1o1 + q)*(f_TE - f_BW + (f_TE + f_BW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TE + f_BW)) / (c1o1 + q);
+			(D.f[DIR_M0M])[kbw] = (c1o1 - q) / (c1o1 + q)*(f_TE - f_BW + (f_TE + f_BW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TE + f_BW)) / (c1o1 + q);
 		}
 
 		q = q_dirBW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx1 - vx3)*(-vx1 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TE])[kte] = (c1o1 - q) / (c1o1 + q)*(f_BW - f_TE + (f_BW + f_TE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BW + f_TE)) / (c1o1 + q);
+			(D.f[DIR_P0P])[kte] = (c1o1 - q) / (c1o1 + q)*(f_BW - f_TE + (f_BW + f_TE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BW + f_TE)) / (c1o1 + q);
 		}
 
 		q = q_dirBE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx1 - vx3)*(vx1 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TW])[ktw] = (c1o1 - q) / (c1o1 + q)*(f_BE - f_TW + (f_BE + f_TW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BE + f_TW)) / (c1o1 + q);
+			(D.f[DIR_M0P])[ktw] = (c1o1 - q) / (c1o1 + q)*(f_BE - f_TW + (f_BE + f_TW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BE + f_TW)) / (c1o1 + q);
 		}
 
 		q = q_dirTW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx1 + vx3)*(-vx1 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BE])[kbe] = (c1o1 - q) / (c1o1 + q)*(f_TW - f_BE + (f_TW + f_BE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TW + f_BE)) / (c1o1 + q);
+			(D.f[DIR_P0M])[kbe] = (c1o1 - q) / (c1o1 + q)*(f_TW - f_BE + (f_TW + f_BE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TW + f_BE)) / (c1o1 + q);
 		}
 
 		q = q_dirTN[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx2 + vx3)*(vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BS])[kbs] = (c1o1 - q) / (c1o1 + q)*(f_TN - f_BS + (f_TN + f_BS - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TN + f_BS)) / (c1o1 + q);
+			(D.f[DIR_0MM])[kbs] = (c1o1 - q) / (c1o1 + q)*(f_TN - f_BS + (f_TN + f_BS - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TN + f_BS)) / (c1o1 + q);
 		}
 
 		q = q_dirBS[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx2 - vx3)*(-vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TN])[ktn] = (c1o1 - q) / (c1o1 + q)*(f_BS - f_TN + (f_BS + f_TN - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BS + f_TN)) / (c1o1 + q);
+			(D.f[DIR_0PP])[ktn] = (c1o1 - q) / (c1o1 + q)*(f_BS - f_TN + (f_BS + f_TN - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BS + f_TN)) / (c1o1 + q);
 		}
 
 		q = q_dirBN[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(vx2 - vx3)*(vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TS])[kts] = (c1o1 - q) / (c1o1 + q)*(f_BN - f_TS + (f_BN + f_TS - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BN + f_TS)) / (c1o1 + q);
+			(D.f[DIR_0MP])[kts] = (c1o1 - q) / (c1o1 + q)*(f_BN - f_TS + (f_BN + f_TS - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BN + f_TS)) / (c1o1 + q);
 		}
 
 		q = q_dirTS[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o54* (drho + c9o2*(-vx2 + vx3)*(-vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BN])[kbn] = (c1o1 - q) / (c1o1 + q)*(f_TS - f_BN + (f_TS + f_BN - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TS + f_BN)) / (c1o1 + q);
+			(D.f[DIR_0PM])[kbn] = (c1o1 - q) / (c1o1 + q)*(f_TS - f_BN + (f_TS + f_BN - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TS + f_BN)) / (c1o1 + q);
 		}
 
 		q = q_dirTNE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(vx1 + vx2 + vx3)*(vx1 + vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BSW])[kbsw] = (c1o1 - q) / (c1o1 + q)*(f_TNE - f_BSW + (f_TNE + f_BSW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TNE + f_BSW)) / (c1o1 + q);
+			(D.f[DIR_MMM])[kbsw] = (c1o1 - q) / (c1o1 + q)*(f_TNE - f_BSW + (f_TNE + f_BSW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TNE + f_BSW)) / (c1o1 + q);
 		}
 
 		q = q_dirBSW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(-vx1 - vx2 - vx3)*(-vx1 - vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TNE])[ktne] = (c1o1 - q) / (c1o1 + q)*(f_BSW - f_TNE + (f_BSW + f_TNE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BSW + f_TNE)) / (c1o1 + q);
+			(D.f[DIR_PPP])[ktne] = (c1o1 - q) / (c1o1 + q)*(f_BSW - f_TNE + (f_BSW + f_TNE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BSW + f_TNE)) / (c1o1 + q);
 		}
 
 		q = q_dirBNE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(vx1 + vx2 - vx3)*(vx1 + vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TSW])[ktsw] = (c1o1 - q) / (c1o1 + q)*(f_BNE - f_TSW + (f_BNE + f_TSW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BNE + f_TSW)) / (c1o1 + q);
+			(D.f[DIR_MMP])[ktsw] = (c1o1 - q) / (c1o1 + q)*(f_BNE - f_TSW + (f_BNE + f_TSW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BNE + f_TSW)) / (c1o1 + q);
 		}
 
 		q = q_dirTSW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(-vx1 - vx2 + vx3)*(-vx1 - vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BNE])[kbne] = (c1o1 - q) / (c1o1 + q)*(f_TSW - f_BNE + (f_TSW + f_BNE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TSW + f_BNE)) / (c1o1 + q);
+			(D.f[DIR_PPM])[kbne] = (c1o1 - q) / (c1o1 + q)*(f_TSW - f_BNE + (f_TSW + f_BNE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TSW + f_BNE)) / (c1o1 + q);
 		}
 
 		q = q_dirTSE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(vx1 - vx2 + vx3)*(vx1 - vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BNW])[kbnw] = (c1o1 - q) / (c1o1 + q)*(f_TSE - f_BNW + (f_TSE + f_BNW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TSE + f_BNW)) / (c1o1 + q);
+			(D.f[DIR_MPM])[kbnw] = (c1o1 - q) / (c1o1 + q)*(f_TSE - f_BNW + (f_TSE + f_BNW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TSE + f_BNW)) / (c1o1 + q);
 		}
 
 		q = q_dirBNW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(-vx1 + vx2 - vx3)*(-vx1 + vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TSE])[ktse] = (c1o1 - q) / (c1o1 + q)*(f_BNW - f_TSE + (f_BNW + f_TSE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BNW + f_TSE)) / (c1o1 + q);
+			(D.f[DIR_PMP])[ktse] = (c1o1 - q) / (c1o1 + q)*(f_BNW - f_TSE + (f_BNW + f_TSE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BNW + f_TSE)) / (c1o1 + q);
 		}
 
 		q = q_dirBSE[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(vx1 - vx2 - vx3)*(vx1 - vx2 - vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[TNW])[ktnw] = (c1o1 - q) / (c1o1 + q)*(f_BSE - f_TNW + (f_BSE + f_TNW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BSE + f_TNW)) / (c1o1 + q);
+			(D.f[DIR_MPP])[ktnw] = (c1o1 - q) / (c1o1 + q)*(f_BSE - f_TNW + (f_BSE + f_TNW - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_BSE + f_TNW)) / (c1o1 + q);
 		}
 
 		q = q_dirTNW[k];
 		if (q >= c0o1 && q <= c1o1)
 		{
 			feq = c1o216*(drho + c9o2*(-vx1 + vx2 + vx3)*(-vx1 + vx2 + vx3) * (c1o1 + drho) - cu_sq);
-			(D.f[BSE])[kbse] = (c1o1 - q) / (c1o1 + q)*(f_TNW - f_BSE + (f_TNW + f_BSE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TNW + f_BSE)) / (c1o1 + q);
+			(D.f[DIR_PMM])[kbse] = (c1o1 - q) / (c1o1 + q)*(f_TNW - f_BSE + (f_TNW + f_BSE - c2o1*feq*om1) / (c1o1 - om1))*c1o2 + (q*(f_TNW + f_BSE)) / (c1o1 + q);
 		}
 	}
 }
@@ -872,7 +872,7 @@ extern "C" __global__ void QDeviceCompThinWallsPartOne27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QThinWallsPartTwo27(
+__global__ void QThinWallsPartTwo27(
 	real* DD, 
 	int* k_Q, 
 	real* QQ,
@@ -904,32 +904,32 @@ extern "C" __global__ void QThinWallsPartTwo27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       uint KQK  = k_Q[k];
@@ -980,123 +980,123 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	  Distributions27 D, DN;
 	  if (isEvenTimestep == true)
 	  {
-		  D.f[E] = &DD[E   *size_Mat];
-		  D.f[W] = &DD[W   *size_Mat];
-		  D.f[N] = &DD[N   *size_Mat];
-		  D.f[S] = &DD[S   *size_Mat];
-		  D.f[T] = &DD[T   *size_Mat];
-		  D.f[B] = &DD[B   *size_Mat];
-		  D.f[NE] = &DD[NE  *size_Mat];
-		  D.f[SW] = &DD[SW  *size_Mat];
-		  D.f[SE] = &DD[SE  *size_Mat];
-		  D.f[NW] = &DD[NW  *size_Mat];
-		  D.f[TE] = &DD[TE  *size_Mat];
-		  D.f[BW] = &DD[BW  *size_Mat];
-		  D.f[BE] = &DD[BE  *size_Mat];
-		  D.f[TW] = &DD[TW  *size_Mat];
-		  D.f[TN] = &DD[TN  *size_Mat];
-		  D.f[BS] = &DD[BS  *size_Mat];
-		  D.f[BN] = &DD[BN  *size_Mat];
-		  D.f[TS] = &DD[TS  *size_Mat];
-		  D.f[REST] = &DD[REST*size_Mat];
-		  D.f[TNE] = &DD[TNE *size_Mat];
-		  D.f[TSW] = &DD[TSW *size_Mat];
-		  D.f[TSE] = &DD[TSE *size_Mat];
-		  D.f[TNW] = &DD[TNW *size_Mat];
-		  D.f[BNE] = &DD[BNE *size_Mat];
-		  D.f[BSW] = &DD[BSW *size_Mat];
-		  D.f[BSE] = &DD[BSE *size_Mat];
-		  D.f[BNW] = &DD[BNW *size_Mat];
+		  D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
+		  D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
+		  D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
+		  D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
+		  D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
+		  D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
+		  D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
+		  D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
+		  D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
+		  D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
+		  D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
+		  D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
+		  D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
+		  D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
+		  D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
+		  D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
+		  D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
+		  D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
+		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		  D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
+		  D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
+		  D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
+		  D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
+		  D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
+		  D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
+		  D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
+		  D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
 	  }
 	  else
 	  {
-		  D.f[W] = &DD[E   *size_Mat];
-		  D.f[E] = &DD[W   *size_Mat];
-		  D.f[S] = &DD[N   *size_Mat];
-		  D.f[N] = &DD[S   *size_Mat];
-		  D.f[B] = &DD[T   *size_Mat];
-		  D.f[T] = &DD[B   *size_Mat];
-		  D.f[SW] = &DD[NE  *size_Mat];
-		  D.f[NE] = &DD[SW  *size_Mat];
-		  D.f[NW] = &DD[SE  *size_Mat];
-		  D.f[SE] = &DD[NW  *size_Mat];
-		  D.f[BW] = &DD[TE  *size_Mat];
-		  D.f[TE] = &DD[BW  *size_Mat];
-		  D.f[TW] = &DD[BE  *size_Mat];
-		  D.f[BE] = &DD[TW  *size_Mat];
-		  D.f[BS] = &DD[TN  *size_Mat];
-		  D.f[TN] = &DD[BS  *size_Mat];
-		  D.f[TS] = &DD[BN  *size_Mat];
-		  D.f[BN] = &DD[TS  *size_Mat];
-		  D.f[REST] = &DD[REST*size_Mat];
-		  D.f[TNE] = &DD[BSW *size_Mat];
-		  D.f[TSW] = &DD[BNE *size_Mat];
-		  D.f[TSE] = &DD[BNW *size_Mat];
-		  D.f[TNW] = &DD[BSE *size_Mat];
-		  D.f[BNE] = &DD[TSW *size_Mat];
-		  D.f[BSW] = &DD[TNE *size_Mat];
-		  D.f[BSE] = &DD[TNW *size_Mat];
-		  D.f[BNW] = &DD[TSE *size_Mat];
+		  D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
+		  D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
+		  D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
+		  D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
+		  D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
+		  D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
+		  D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
+		  D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
+		  D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
+		  D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
+		  D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
+		  D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
+		  D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
+		  D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
+		  D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
+		  D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
+		  D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
+		  D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
+		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		  D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
+		  D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
+		  D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
+		  D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
+		  D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
+		  D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
+		  D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
+		  D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
 	  }
 	  if (isEvenTimestep==false)
       {
-         DN.f[E   ] = &DD[E   *size_Mat];
-         DN.f[W   ] = &DD[W   *size_Mat];
-         DN.f[N   ] = &DD[N   *size_Mat];
-         DN.f[S   ] = &DD[S   *size_Mat];
-         DN.f[T   ] = &DD[T   *size_Mat];
-         DN.f[B   ] = &DD[B   *size_Mat];
-         DN.f[NE  ] = &DD[NE  *size_Mat];
-         DN.f[SW  ] = &DD[SW  *size_Mat];
-         DN.f[SE  ] = &DD[SE  *size_Mat];
-         DN.f[NW  ] = &DD[NW  *size_Mat];
-         DN.f[TE  ] = &DD[TE  *size_Mat];
-         DN.f[BW  ] = &DD[BW  *size_Mat];
-         DN.f[BE  ] = &DD[BE  *size_Mat];
-         DN.f[TW  ] = &DD[TW  *size_Mat];
-         DN.f[TN  ] = &DD[TN  *size_Mat];
-         DN.f[BS  ] = &DD[BS  *size_Mat];
-         DN.f[BN  ] = &DD[BN  *size_Mat];
-         DN.f[TS  ] = &DD[TS  *size_Mat];
-         DN.f[REST] = &DD[REST*size_Mat];
-         DN.f[TNE ] = &DD[TNE *size_Mat];
-         DN.f[TSW ] = &DD[TSW *size_Mat];
-         DN.f[TSE ] = &DD[TSE *size_Mat];
-         DN.f[TNW ] = &DD[TNW *size_Mat];
-         DN.f[BNE ] = &DD[BNE *size_Mat];
-         DN.f[BSW ] = &DD[BSW *size_Mat];
-         DN.f[BSE ] = &DD[BSE *size_Mat];
-         DN.f[BNW ] = &DD[BNW *size_Mat];
+         DN.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         DN.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         DN.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         DN.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         DN.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         DN.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         DN.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         DN.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         DN.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         DN.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         DN.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         DN.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         DN.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         DN.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         DN.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         DN.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         DN.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         DN.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
+         DN.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         DN.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         DN.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         DN.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         DN.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         DN.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         DN.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         DN.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         DN.f[W   ] = &DD[E   *size_Mat];
-         DN.f[E   ] = &DD[W   *size_Mat];
-         DN.f[S   ] = &DD[N   *size_Mat];
-         DN.f[N   ] = &DD[S   *size_Mat];
-         DN.f[B   ] = &DD[T   *size_Mat];
-         DN.f[T   ] = &DD[B   *size_Mat];
-         DN.f[SW  ] = &DD[NE  *size_Mat];
-         DN.f[NE  ] = &DD[SW  *size_Mat];
-         DN.f[NW  ] = &DD[SE  *size_Mat];
-         DN.f[SE  ] = &DD[NW  *size_Mat];
-         DN.f[BW  ] = &DD[TE  *size_Mat];
-         DN.f[TE  ] = &DD[BW  *size_Mat];
-         DN.f[TW  ] = &DD[BE  *size_Mat];
-         DN.f[BE  ] = &DD[TW  *size_Mat];
-         DN.f[BS  ] = &DD[TN  *size_Mat];
-         DN.f[TN  ] = &DD[BS  *size_Mat];
-         DN.f[TS  ] = &DD[BN  *size_Mat];
-         DN.f[BN  ] = &DD[TS  *size_Mat];
-         DN.f[REST] = &DD[REST*size_Mat];
-         DN.f[TNE ] = &DD[BSW *size_Mat];
-         DN.f[TSW ] = &DD[BNE *size_Mat];
-         DN.f[TSE ] = &DD[BNW *size_Mat];
-         DN.f[TNW ] = &DD[BSE *size_Mat];
-         DN.f[BNE ] = &DD[TSW *size_Mat];
-         DN.f[BSW ] = &DD[TNE *size_Mat];
-         DN.f[BSE ] = &DD[TNW *size_Mat];
-         DN.f[BNW ] = &DD[TSE *size_Mat];
+         DN.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         DN.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         DN.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         DN.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         DN.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         DN.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         DN.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         DN.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         DN.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         DN.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         DN.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         DN.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         DN.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         DN.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         DN.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         DN.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         DN.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         DN.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         DN.f[DIR_000] = &DD[DIR_000*size_Mat];
+         DN.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         DN.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         DN.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         DN.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         DN.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         DN.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         DN.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         DN.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //directions allways exchange
@@ -1106,32 +1106,32 @@ extern "C" __global__ void QThinWallsPartTwo27(
 	  //( 1  1  1) ( 1  0  0) ( 0  1  0) ( 0  0  1) ( 1  1  0) ( 1  0  1) ( 0  1  1) (-1 -1  1) (-1  1 -1) ( 1 -1 -1) (-1  1  0) (-1  0  1) ( 0 -1  1)
 	  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real q, tmp;
-      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[W  ])[kw  ]; (DN.f[W  ])[kw  ]=(D.f[W  ])[kw  ]; (D.f[W  ])[kw  ]=tmp;}}
-	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[E  ])[ke  ]; (DN.f[E  ])[ke  ]=(D.f[E  ])[ke  ]; (D.f[E  ])[ke  ]=tmp;}}
-      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[S  ])[ks  ]; (DN.f[S  ])[ks  ]=(D.f[S  ])[ks  ]; (D.f[S  ])[ks  ]=tmp;}}
-      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[N  ])[kn  ]; (DN.f[N  ])[kn  ]=(D.f[N  ])[kn  ]; (D.f[N  ])[kn  ]=tmp;}}
-      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[B  ])[kb  ]; (DN.f[B  ])[kb  ]=(D.f[B  ])[kb  ]; (D.f[B  ])[kb  ]=tmp;}}
-      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[T  ])[kt  ]; (DN.f[T  ])[kt  ]=(D.f[T  ])[kt  ]; (D.f[T  ])[kt  ]=tmp;}}
-      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[SW ])[ksw ]; (DN.f[SW ])[ksw ]=(D.f[SW ])[ksw ]; (D.f[SW ])[ksw ]=tmp;}}
-      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[NE ])[kne ]; (DN.f[NE ])[kne ]=(D.f[NE ])[kne ]; (D.f[NE ])[kne ]=tmp;}}
-      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[NW ])[knw ]; (DN.f[NW ])[knw ]=(D.f[NW ])[knw ]; (D.f[NW ])[knw ]=tmp;}}
-      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[SE ])[kse ]; (DN.f[SE ])[kse ]=(D.f[SE ])[kse ]; (D.f[SE ])[kse ]=tmp;}}
-      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[BW ])[kbw ]; (DN.f[BW ])[kbw ]=(D.f[BW ])[kbw ]; (D.f[BW ])[kbw ]=tmp;}}
-      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TE ])[kte ]; (DN.f[TE ])[kte ]=(D.f[TE ])[kte ]; (D.f[TE ])[kte ]=tmp;}}
-      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TW ])[ktw ]; (DN.f[TW ])[ktw ]=(D.f[TW ])[ktw ]; (D.f[TW ])[ktw ]=tmp;}}
-      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[BE ])[kbe ]; (DN.f[BE ])[kbe ]=(D.f[BE ])[kbe ]; (D.f[BE ])[kbe ]=tmp;}}
-      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[BS ])[kbs ]; (DN.f[BS ])[kbs ]=(D.f[BS ])[kbs ]; (D.f[BS ])[kbs ]=tmp;}}
-      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TN ])[ktn ]; (DN.f[TN ])[ktn ]=(D.f[TN ])[ktn ]; (D.f[TN ])[ktn ]=tmp;}}
-      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TS ])[kts ]; (DN.f[TS ])[kts ]=(D.f[TS ])[kts ]; (D.f[TS ])[kts ]=tmp;}}
-      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[BN ])[kbn ]; (DN.f[BN ])[kbn ]=(D.f[BN ])[kbn ]; (D.f[BN ])[kbn ]=tmp;}}
-      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbsw] < GEO_FLUID){tmp = (DN.f[BSW])[kbsw]; (DN.f[BSW])[kbsw]=(D.f[BSW])[kbsw]; (D.f[BSW])[kbsw]=tmp;}}
-      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TNE])[ktne]; (DN.f[TNE])[ktne]=(D.f[TNE])[ktne]; (D.f[TNE])[ktne]=tmp;}}
-      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[TSW])[ktsw]; (DN.f[TSW])[ktsw]=(D.f[TSW])[ktsw]; (D.f[TSW])[ktsw]=tmp;}}
-      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmmp] < GEO_FLUID){tmp = (DN.f[BNE])[kbne]; (DN.f[BNE])[kbne]=(D.f[BNE])[kbne]; (D.f[BNE])[kbne]=tmp;}}
-      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[BNW])[kbnw]; (DN.f[BNW])[kbnw]=(D.f[BNW])[kbnw]; (D.f[BNW])[kbnw]=tmp;}}
-      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmpm] < GEO_FLUID){tmp = (DN.f[TSE])[ktse]; (DN.f[TSE])[ktse]=(D.f[TSE])[ktse]; (D.f[TSE])[ktse]=tmp;}}
-      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kpmm] < GEO_FLUID){tmp = (DN.f[TNW])[ktnw]; (DN.f[TNW])[ktnw]=(D.f[TNW])[ktnw]; (D.f[TNW])[ktnw]=tmp;}}
-      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[BSE])[kbse]; (DN.f[BSE])[kbse]=(D.f[BSE])[kbse]; (D.f[BSE])[kbse]=tmp;}}
+      q = q_dirE[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kw  ] < GEO_FLUID){tmp = (DN.f[DIR_M00  ])[kw  ]; (DN.f[DIR_M00  ])[kw  ]=(D.f[DIR_M00  ])[kw  ]; (D.f[DIR_M00  ])[kw  ]=tmp;}}
+	  q = q_dirW[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P00  ])[ke  ]; (DN.f[DIR_P00  ])[ke  ]=(D.f[DIR_P00  ])[ke  ]; (D.f[DIR_P00  ])[ke  ]=tmp;}}
+      q = q_dirN[k];   if (q>=c0o1 && q<=c1o1){ if (geom[ks  ] < GEO_FLUID){tmp = (DN.f[DIR_0M0  ])[ks  ]; (DN.f[DIR_0M0  ])[ks  ]=(D.f[DIR_0M0  ])[ks  ]; (D.f[DIR_0M0  ])[ks  ]=tmp;}}
+      q = q_dirS[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0P0  ])[kn  ]; (DN.f[DIR_0P0  ])[kn  ]=(D.f[DIR_0P0  ])[kn  ]; (D.f[DIR_0P0  ])[kn  ]=tmp;}}
+      q = q_dirT[k];   if (q>=c0o1 && q<=c1o1){ if (geom[kb  ] < GEO_FLUID){tmp = (DN.f[DIR_00M  ])[kb  ]; (DN.f[DIR_00M  ])[kb  ]=(D.f[DIR_00M  ])[kb  ]; (D.f[DIR_00M  ])[kb  ]=tmp;}}
+      q = q_dirB[k];   if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_00P  ])[kt  ]; (DN.f[DIR_00P  ])[kt  ]=(D.f[DIR_00P  ])[kt  ]; (D.f[DIR_00P  ])[kt  ]=tmp;}}
+      q = q_dirNE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[ksw ] < GEO_FLUID){tmp = (DN.f[DIR_MM0 ])[ksw ]; (DN.f[DIR_MM0 ])[ksw ]=(D.f[DIR_MM0 ])[ksw ]; (D.f[DIR_MM0 ])[ksw ]=tmp;}}
+      q = q_dirSW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PP0 ])[kne ]; (DN.f[DIR_PP0 ])[kne ]=(D.f[DIR_PP0 ])[kne ]; (D.f[DIR_PP0 ])[kne ]=tmp;}}
+      q = q_dirSE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MP0 ])[knw ]; (DN.f[DIR_MP0 ])[knw ]=(D.f[DIR_MP0 ])[knw ]; (D.f[DIR_MP0 ])[knw ]=tmp;}}
+      q = q_dirNW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kmp0] < GEO_FLUID){tmp = (DN.f[DIR_PM0 ])[kse ]; (DN.f[DIR_PM0 ])[kse ]=(D.f[DIR_PM0 ])[kse ]; (D.f[DIR_PM0 ])[kse ]=tmp;}}
+      q = q_dirTE[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbw ] < GEO_FLUID){tmp = (DN.f[DIR_M0M ])[kbw ]; (DN.f[DIR_M0M ])[kbw ]=(D.f[DIR_M0M ])[kbw ]; (D.f[DIR_M0M ])[kbw ]=tmp;}}
+      q = q_dirBW[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_P0P ])[kte ]; (DN.f[DIR_P0P ])[kte ]=(D.f[DIR_P0P ])[kte ]; (D.f[DIR_P0P ])[kte ]=tmp;}}
+      q = q_dirBE[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_M0P ])[ktw ]; (DN.f[DIR_M0P ])[ktw ]=(D.f[DIR_M0P ])[ktw ]; (D.f[DIR_M0P ])[ktw ]=tmp;}}
+      q = q_dirTW[k];  if (q>=c0o1 && q<=c1o1){ if (geom[km0p] < GEO_FLUID){tmp = (DN.f[DIR_P0M ])[kbe ]; (DN.f[DIR_P0M ])[kbe ]=(D.f[DIR_P0M ])[kbe ]; (D.f[DIR_P0M ])[kbe ]=tmp;}}
+      q = q_dirTN[k];  if (q>=c0o1 && q<=c1o1){ if (geom[kbs ] < GEO_FLUID){tmp = (DN.f[DIR_0MM ])[kbs ]; (DN.f[DIR_0MM ])[kbs ]=(D.f[DIR_0MM ])[kbs ]; (D.f[DIR_0MM ])[kbs ]=tmp;}}
+      q = q_dirBS[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0PP ])[ktn ]; (DN.f[DIR_0PP ])[ktn ]=(D.f[DIR_0PP ])[ktn ]; (D.f[DIR_0PP ])[ktn ]=tmp;}}
+      q = q_dirBN[k];  if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_0MP ])[kts ]; (DN.f[DIR_0MP ])[kts ]=(D.f[DIR_0MP ])[kts ]; (D.f[DIR_0MP ])[kts ]=tmp;}}
+      q = q_dirTS[k];  if (q>=c0o1 && q<=c1o1){ if (geom[k0mp] < GEO_FLUID){tmp = (DN.f[DIR_0PM ])[kbn ]; (DN.f[DIR_0PM ])[kbn ]=(D.f[DIR_0PM ])[kbn ]; (D.f[DIR_0PM ])[kbn ]=tmp;}}
+      q = q_dirTNE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kbsw] < GEO_FLUID){tmp = (DN.f[DIR_MMM])[kbsw]; (DN.f[DIR_MMM])[kbsw]=(D.f[DIR_MMM])[kbsw]; (D.f[DIR_MMM])[kbsw]=tmp;}}
+      q = q_dirBSW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PPP])[ktne]; (DN.f[DIR_PPP])[ktne]=(D.f[DIR_PPP])[ktne]; (D.f[DIR_PPP])[ktne]=tmp;}}
+      q = q_dirBNE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MMP])[ktsw]; (DN.f[DIR_MMP])[ktsw]=(D.f[DIR_MMP])[ktsw]; (D.f[DIR_MMP])[ktsw]=tmp;}}
+      q = q_dirTSW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmmp] < GEO_FLUID){tmp = (DN.f[DIR_PPM])[kbne]; (DN.f[DIR_PPM])[kbne]=(D.f[DIR_PPM])[kbne]; (D.f[DIR_PPM])[kbne]=tmp;}}
+      q = q_dirTSE[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_MPM])[kbnw]; (DN.f[DIR_MPM])[kbnw]=(D.f[DIR_MPM])[kbnw]; (D.f[DIR_MPM])[kbnw]=tmp;}}
+      q = q_dirBNW[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kmpm] < GEO_FLUID){tmp = (DN.f[DIR_PMP])[ktse]; (DN.f[DIR_PMP])[ktse]=(D.f[DIR_PMP])[ktse]; (D.f[DIR_PMP])[ktse]=tmp;}}
+      q = q_dirBSE[k]; if (q>=c0o1 && q<=c1o1){ if (geom[kpmm] < GEO_FLUID){tmp = (DN.f[DIR_MPP])[ktnw]; (DN.f[DIR_MPP])[ktnw]=(D.f[DIR_MPP])[ktnw]; (D.f[DIR_MPP])[ktnw]=tmp;}}
+      q = q_dirTNW[k]; if (q>=c0o1 && q<=c1o1){                            {tmp = (DN.f[DIR_PMM])[kbse]; (DN.f[DIR_PMM])[kbse]=(D.f[DIR_PMM])[kbse]; (D.f[DIR_PMM])[kbse]=tmp;}}
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
index 2c81a9588ef36d225bec67ee1c764f2e2fc7665e..f8cf8ab13c39d55477bf006cd27f7943dcb5b53a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulenceIntensity.cu
@@ -19,7 +19,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void CalcTurbulenceIntensity(
+__global__ void CalcTurbulenceIntensity(
    real* vxx,
    real* vyy,
    real* vzz,
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h
deleted file mode 100644
index 3ac19bc1e27c9519396e27983fc29b8fde56e7cc..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef TURBULENT_VISCOSITY_H_
-#define TURBULENT_VISCOSITY_H_
-
-
-class Parameter;
-
-void calcTurbulentViscosityAMD(Parameter* para, int level);
-
-#endif //TURBULENT_VISCOSITY_H_
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..eb301515527a9e8a3056676b0d4dffe8197c7dbe
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityInlines.cuh
@@ -0,0 +1,61 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TurbulentViscosity.h
+//! \ingroup GPU
+//! \author Henry Korb, Henrik Asmuth
+//======================================================================================
+
+#ifndef TURBULENT_VISCOSITY_INLINES_CUH_
+#define TURBULENT_VISCOSITY_INLINES_CUH_
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include "LBM/LB.h" 
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
+__inline__ __device__ real calcTurbulentViscositySmagorinsky(real Cs, real dxux, real dyuy, real dzuz, real Dxy, real Dxz , real Dyz)
+{
+    return Cs*Cs * sqrt( c2o1 * ( dxux*dxux + dyuy*dyuy + dzuz*dzuz ) + Dxy*Dxy + Dxz*Dxz + Dyz*Dyz );
+}
+
+__inline__ __device__ real calcTurbulentViscosityQR(real C, real dxux, real dyuy, real dzuz, real Dxy, real Dxz , real Dyz)
+{
+        // ! Verstappen's QR model
+        //! Second invariant of the strain-rate tensor
+        real Q = c1o2*( dxux*dxux + dyuy*dyuy + dzuz*dzuz ) + c1o4*( Dxy*Dxy + Dxz*Dxz + Dyz*Dyz);
+        //! Third invariant of the strain-rate tensor (determinant)
+        real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy );
+        
+        return C * max(R, c0o1) / Q;
+}
+
+#endif //TURBULENT_VISCOSITY_H_e
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
similarity index 52%
rename from src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu
rename to src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
index 4ff2c20befcb90bc8cbc3830019baae18aae0a2c..f4167af01eb30b458442057ada098f34998d1a98 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosity.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.cu
@@ -1,20 +1,47 @@
-#include "TurbulentViscosity.h"
-#include "Core/DataTypes.h"
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TurbulentViscosityKernels.cu
+//! \ingroup GPU
+//! \author Henry Korb, Henrik Asmuth
+//======================================================================================
+
+#include "TurbulentViscosityKernels.h"
 #include "lbm/constants/NumericConstants.h"
 #include "Parameter/Parameter.h"
 #include "cuda/CudaGrid.h"
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
 #include "LBM/LB.h"
+#include "Kernel/Utilities/DistributionHelper.cuh"
 
 using namespace vf::lbm::constant;
 
-__host__ __device__ __forceinline__ real calcDamping(real kappa, real xPos, real x0, real x1)
-{
-    real x = max((xPos-x0)/(x1-x0), 0.f);
-    return kappa*x*x*(3-2*x); // polynomial with f(0)=0, f'(0) = 0, f(1) = 1, f'(1)=0
-}
-
 __host__ __device__ __forceinline__ void calcDerivatives(const uint& k, uint& kM, uint& kP, uint* typeOfGridNode, real* vx, real* vy, real* vz, real& dvx, real& dvy, real& dvz)
 {
     bool fluidP = (typeOfGridNode[kP] == GEO_FLUID);
@@ -30,25 +57,15 @@ __global__ void calcAMD(real* vx,
                         real* vy,
                         real* vz,
                         real* turbulentViscosity,
-                        real viscosity,
                         uint* neighborX,
                         uint* neighborY,
                         uint* neighborZ,
                         uint* neighborWSB,
-                        real* coordX,
                         uint* typeOfGridNode,
                         uint size_Mat,
                         real SGSConstant)
 {
-
-    const uint x = threadIdx.x; 
-    const uint y = blockIdx.x; 
-    const uint z = blockIdx.y; 
-
-    const uint nx = blockDim.x;
-    const uint ny = gridDim.x;
-
-    const uint k = nx*(ny*z + y) + x;
+    const uint k = vf::gpu::getNodeIndex();
     if(k >= size_Mat) return;
     if(typeOfGridNode[k] != GEO_FLUID) return;
 
@@ -78,19 +95,9 @@ __global__ void calcAMD(real* vx,
                         (dvxdx*dvzdx + dvxdy*dvzdy + dvxdz*dvzdz) * (dvxdz+dvzdx) + 
                         (dvydx*dvzdx + dvydy*dvzdy + dvydz*dvzdz) * (dvydz+dvzdy);
 
-    // const real kappa = 10000.f; // multiplier of the viscosity 
-    // const real x0 = 5500.f; // start of damping layer
-    // const real x1 = 6000.f; // total length of domain
-    // real xPos = coordX[k];
-    real nuDamping = 0.0f; //calcDamping(kappa, xPos, x0, x1)*viscosity;
-    real nuSGS = max(c0o1,-SGSConstant*enumerator)/denominator;
-
-    real nu = nuSGS + nuDamping;
-    // if(k >= 800600 && k <= 800637) printf("k %d x %f nu %f nu SGS %f nu damping %f \n ", k, xPos, nu, nuSGS, nuDamping);
-    turbulentViscosity[k] = nu;
+    turbulentViscosity[k] = denominator != c0o1 ? max(c0o1,-SGSConstant*enumerator)/denominator : c0o1;
 }
 
-
 void calcTurbulentViscosityAMD(Parameter* para, int level)
 {
     vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes);
@@ -99,16 +106,23 @@ void calcTurbulentViscosityAMD(Parameter* para, int level)
         para->getParD(level)->velocityY,
         para->getParD(level)->velocityZ,
         para->getParD(level)->turbViscosity,
-        para->getViscosity(),
         para->getParD(level)->neighborX,
         para->getParD(level)->neighborY,
         para->getParD(level)->neighborZ,
         para->getParD(level)->neighborInverse,
-        para->getParD(level)->coordinateX,
         para->getParD(level)->typeOfGridNode,
         para->getParD(level)->numberOfNodes,
         para->getSGSConstant()
     );
     getLastCudaError("calcAMD execution failed");
 }
-    
\ No newline at end of file
+    
+__inline__ __device__ real calcTurbulentViscosityQR(real C, real dxux, real dyuy, real dzuz, real Dxy, real Dxz , real Dyz)
+{
+        // ! Verstappen's QR model
+        //! Second invariant of the strain-rate tensor
+        real Q = c1o2*( dxux*dxux + dyuy*dyuy + dzuz*dzuz ) + c1o4*( Dxy*Dxy + Dxz*Dxz + Dyz*Dyz);
+        //! Third invariant of the strain-rate tensor (determinant)
+        real R = - dxux*dyuy*dzuz - c1o4*( Dxy*Dxz*Dyz + dxux*Dyz*Dyz + dyuy*Dxz*Dxz + dzuz*Dxy*Dxy );
+        return C * max(R, c0o1) / Q;
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h
new file mode 100644
index 0000000000000000000000000000000000000000..b227e680301cd4639d48a5cf3ce74f08eb7e1b9f
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/GPU/TurbulentViscosityKernels.h
@@ -0,0 +1,52 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TurbulentViscosityKernels.h
+//! \ingroup GPU
+//! \author Henry Korb, Henrik Asmuth
+//======================================================================================
+
+#ifndef TURBULENT_VISCOSITY_KERNELS_H_
+#define TURBULENT_VISCOSITY_KERNELS_H_
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#include "LBM/LB.h" 
+#include "Core/DataTypes.h"
+#include <lbm/constants/NumericConstants.h>
+
+using namespace vf::lbm::constant;
+
+class Parameter;
+
+void calcTurbulentViscosityAMD(Parameter* para, int level);
+
+
+
+#endif //TURBULENT_VISCOSITY_H_e
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
index 31cf9947e3112222d07a32a58abb9c7da9aed4c0..05c85e8b546aeaa964b1dbb61cbf01dd9b82ca1a 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/VelocityBCs27.cu
@@ -16,7 +16,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompPlusSlip27(
+__global__ void QVelDeviceCompPlusSlip27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -34,63 +34,63 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -115,32 +115,32 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
 		   *q_dirBE, *q_dirTW, *q_dirTN, *q_dirBS, *q_dirBN, *q_dirTS,
 		   *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 		   *q_dirBSE, *q_dirBNW;
-	   q_dirE = &QQ[E   * numberOfBCnodes];
-	   q_dirW = &QQ[W   * numberOfBCnodes];
-	   q_dirN = &QQ[N   * numberOfBCnodes];
-	   q_dirS = &QQ[S   * numberOfBCnodes];
-	   q_dirT = &QQ[T   * numberOfBCnodes];
-	   q_dirB = &QQ[B   * numberOfBCnodes];
-	   q_dirNE = &QQ[NE  * numberOfBCnodes];
-	   q_dirSW = &QQ[SW  * numberOfBCnodes];
-	   q_dirSE = &QQ[SE  * numberOfBCnodes];
-	   q_dirNW = &QQ[NW  * numberOfBCnodes];
-	   q_dirTE = &QQ[TE  * numberOfBCnodes];
-	   q_dirBW = &QQ[BW  * numberOfBCnodes];
-	   q_dirBE = &QQ[BE  * numberOfBCnodes];
-	   q_dirTW = &QQ[TW  * numberOfBCnodes];
-	   q_dirTN = &QQ[TN  * numberOfBCnodes];
-	   q_dirBS = &QQ[BS  * numberOfBCnodes];
-	   q_dirBN = &QQ[BN  * numberOfBCnodes];
-	   q_dirTS = &QQ[TS  * numberOfBCnodes];
-	   q_dirTNE = &QQ[TNE * numberOfBCnodes];
-	   q_dirTSW = &QQ[TSW * numberOfBCnodes];
-	   q_dirTSE = &QQ[TSE * numberOfBCnodes];
-	   q_dirTNW = &QQ[TNW * numberOfBCnodes];
-	   q_dirBNE = &QQ[BNE * numberOfBCnodes];
-	   q_dirBSW = &QQ[BSW * numberOfBCnodes];
-	   q_dirBSE = &QQ[BSE * numberOfBCnodes];
-	   q_dirBNW = &QQ[BNW * numberOfBCnodes];
+	   q_dirE = &QQ[DIR_P00   * numberOfBCnodes];
+	   q_dirW = &QQ[DIR_M00   * numberOfBCnodes];
+	   q_dirN = &QQ[DIR_0P0   * numberOfBCnodes];
+	   q_dirS = &QQ[DIR_0M0   * numberOfBCnodes];
+	   q_dirT = &QQ[DIR_00P   * numberOfBCnodes];
+	   q_dirB = &QQ[DIR_00M   * numberOfBCnodes];
+	   q_dirNE = &QQ[DIR_PP0  * numberOfBCnodes];
+	   q_dirSW = &QQ[DIR_MM0  * numberOfBCnodes];
+	   q_dirSE = &QQ[DIR_PM0  * numberOfBCnodes];
+	   q_dirNW = &QQ[DIR_MP0  * numberOfBCnodes];
+	   q_dirTE = &QQ[DIR_P0P  * numberOfBCnodes];
+	   q_dirBW = &QQ[DIR_M0M  * numberOfBCnodes];
+	   q_dirBE = &QQ[DIR_P0M  * numberOfBCnodes];
+	   q_dirTW = &QQ[DIR_M0P  * numberOfBCnodes];
+	   q_dirTN = &QQ[DIR_0PP  * numberOfBCnodes];
+	   q_dirBS = &QQ[DIR_0MM  * numberOfBCnodes];
+	   q_dirBN = &QQ[DIR_0PM  * numberOfBCnodes];
+	   q_dirTS = &QQ[DIR_0MP  * numberOfBCnodes];
+	   q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+	   q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+	   q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+	   q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+	   q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+	   q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+	   q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+	   q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 	   ////////////////////////////////////////////////////////////////////////////////
 	   //index
 	   unsigned int KQK = k_Q[k];
@@ -175,37 +175,37 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
 	   real f_E, f_W, f_N, f_S, f_T, f_B, f_NE, f_SW, f_SE, f_NW, f_TE, f_BW, f_BE,
 		   f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-	   f_W = (D.f[E])[ke];
-	   f_E = (D.f[W])[kw];
-	   f_S = (D.f[N])[kn];
-	   f_N = (D.f[S])[ks];
-	   f_B = (D.f[T])[kt];
-	   f_T = (D.f[B])[kb];
-	   f_SW = (D.f[NE])[kne];
-	   f_NE = (D.f[SW])[ksw];
-	   f_NW = (D.f[SE])[kse];
-	   f_SE = (D.f[NW])[knw];
-	   f_BW = (D.f[TE])[kte];
-	   f_TE = (D.f[BW])[kbw];
-	   f_TW = (D.f[BE])[kbe];
-	   f_BE = (D.f[TW])[ktw];
-	   f_BS = (D.f[TN])[ktn];
-	   f_TN = (D.f[BS])[kbs];
-	   f_TS = (D.f[BN])[kbn];
-	   f_BN = (D.f[TS])[kts];
-	   f_BSW = (D.f[TNE])[ktne];
-	   f_BNE = (D.f[TSW])[ktsw];
-	   f_BNW = (D.f[TSE])[ktse];
-	   f_BSE = (D.f[TNW])[ktnw];
-	   f_TSW = (D.f[BNE])[kbne];
-	   f_TNE = (D.f[BSW])[kbsw];
-	   f_TNW = (D.f[BSE])[kbse];
-	   f_TSE = (D.f[BNW])[kbnw];
+	   f_W = (D.f[DIR_P00])[ke];
+	   f_E = (D.f[DIR_M00])[kw];
+	   f_S = (D.f[DIR_0P0])[kn];
+	   f_N = (D.f[DIR_0M0])[ks];
+	   f_B = (D.f[DIR_00P])[kt];
+	   f_T = (D.f[DIR_00M])[kb];
+	   f_SW = (D.f[DIR_PP0])[kne];
+	   f_NE = (D.f[DIR_MM0])[ksw];
+	   f_NW = (D.f[DIR_PM0])[kse];
+	   f_SE = (D.f[DIR_MP0])[knw];
+	   f_BW = (D.f[DIR_P0P])[kte];
+	   f_TE = (D.f[DIR_M0M])[kbw];
+	   f_TW = (D.f[DIR_P0M])[kbe];
+	   f_BE = (D.f[DIR_M0P])[ktw];
+	   f_BS = (D.f[DIR_0PP])[ktn];
+	   f_TN = (D.f[DIR_0MM])[kbs];
+	   f_TS = (D.f[DIR_0PM])[kbn];
+	   f_BN = (D.f[DIR_0MP])[kts];
+	   f_BSW = (D.f[DIR_PPP])[ktne];
+	   f_BNE = (D.f[DIR_MMP])[ktsw];
+	   f_BNW = (D.f[DIR_PMP])[ktse];
+	   f_BSE = (D.f[DIR_MPP])[ktnw];
+	   f_TSW = (D.f[DIR_PPM])[kbne];
+	   f_TNE = (D.f[DIR_MMM])[kbsw];
+	   f_TNW = (D.f[DIR_PMM])[kbse];
+	   f_TSE = (D.f[DIR_MPM])[kbnw];
 	   ////////////////////////////////////////////////////////////////////////////////
 	   real vx1, vx2, vx3, drho, feq, q;
 	   drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 		   f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW +
-		   f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]);
+		   f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]);
 
 	   vx1 = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
 		   ((f_BE - f_TW) + (f_TE - f_BW)) + ((f_SE - f_NW) + (f_NE - f_SW)) +
@@ -225,67 +225,67 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
 	   //////////////////////////////////////////////////////////////////////////
 	   if (isEvenTimestep == false)
 	   {
-		   D.f[E] = &DD[E   *size_Mat];
-		   D.f[W] = &DD[W   *size_Mat];
-		   D.f[N] = &DD[N   *size_Mat];
-		   D.f[S] = &DD[S   *size_Mat];
-		   D.f[T] = &DD[T   *size_Mat];
-		   D.f[B] = &DD[B   *size_Mat];
-		   D.f[NE] = &DD[NE  *size_Mat];
-		   D.f[SW] = &DD[SW  *size_Mat];
-		   D.f[SE] = &DD[SE  *size_Mat];
-		   D.f[NW] = &DD[NW  *size_Mat];
-		   D.f[TE] = &DD[TE  *size_Mat];
-		   D.f[BW] = &DD[BW  *size_Mat];
-		   D.f[BE] = &DD[BE  *size_Mat];
-		   D.f[TW] = &DD[TW  *size_Mat];
-		   D.f[TN] = &DD[TN  *size_Mat];
-		   D.f[BS] = &DD[BS  *size_Mat];
-		   D.f[BN] = &DD[BN  *size_Mat];
-		   D.f[TS] = &DD[TS  *size_Mat];
-		   D.f[REST] = &DD[REST*size_Mat];
-		   D.f[TNE] = &DD[TNE *size_Mat];
-		   D.f[TSW] = &DD[TSW *size_Mat];
-		   D.f[TSE] = &DD[TSE *size_Mat];
-		   D.f[TNW] = &DD[TNW *size_Mat];
-		   D.f[BNE] = &DD[BNE *size_Mat];
-		   D.f[BSW] = &DD[BSW *size_Mat];
-		   D.f[BSE] = &DD[BSE *size_Mat];
-		   D.f[BNW] = &DD[BNW *size_Mat];
+		   D.f[DIR_P00] = &DD[DIR_P00   *size_Mat];
+		   D.f[DIR_M00] = &DD[DIR_M00   *size_Mat];
+		   D.f[DIR_0P0] = &DD[DIR_0P0   *size_Mat];
+		   D.f[DIR_0M0] = &DD[DIR_0M0   *size_Mat];
+		   D.f[DIR_00P] = &DD[DIR_00P   *size_Mat];
+		   D.f[DIR_00M] = &DD[DIR_00M   *size_Mat];
+		   D.f[DIR_PP0] = &DD[DIR_PP0  *size_Mat];
+		   D.f[DIR_MM0] = &DD[DIR_MM0  *size_Mat];
+		   D.f[DIR_PM0] = &DD[DIR_PM0  *size_Mat];
+		   D.f[DIR_MP0] = &DD[DIR_MP0  *size_Mat];
+		   D.f[DIR_P0P] = &DD[DIR_P0P  *size_Mat];
+		   D.f[DIR_M0M] = &DD[DIR_M0M  *size_Mat];
+		   D.f[DIR_P0M] = &DD[DIR_P0M  *size_Mat];
+		   D.f[DIR_M0P] = &DD[DIR_M0P  *size_Mat];
+		   D.f[DIR_0PP] = &DD[DIR_0PP  *size_Mat];
+		   D.f[DIR_0MM] = &DD[DIR_0MM  *size_Mat];
+		   D.f[DIR_0PM] = &DD[DIR_0PM  *size_Mat];
+		   D.f[DIR_0MP] = &DD[DIR_0MP  *size_Mat];
+		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		   D.f[DIR_PPP] = &DD[DIR_PPP *size_Mat];
+		   D.f[DIR_MMP] = &DD[DIR_MMP *size_Mat];
+		   D.f[DIR_PMP] = &DD[DIR_PMP *size_Mat];
+		   D.f[DIR_MPP] = &DD[DIR_MPP *size_Mat];
+		   D.f[DIR_PPM] = &DD[DIR_PPM *size_Mat];
+		   D.f[DIR_MMM] = &DD[DIR_MMM *size_Mat];
+		   D.f[DIR_PMM] = &DD[DIR_PMM *size_Mat];
+		   D.f[DIR_MPM] = &DD[DIR_MPM *size_Mat];
 	   }
 	   else
 	   {
-		   D.f[W] = &DD[E   *size_Mat];
-		   D.f[E] = &DD[W   *size_Mat];
-		   D.f[S] = &DD[N   *size_Mat];
-		   D.f[N] = &DD[S   *size_Mat];
-		   D.f[B] = &DD[T   *size_Mat];
-		   D.f[T] = &DD[B   *size_Mat];
-		   D.f[SW] = &DD[NE  *size_Mat];
-		   D.f[NE] = &DD[SW  *size_Mat];
-		   D.f[NW] = &DD[SE  *size_Mat];
-		   D.f[SE] = &DD[NW  *size_Mat];
-		   D.f[BW] = &DD[TE  *size_Mat];
-		   D.f[TE] = &DD[BW  *size_Mat];
-		   D.f[TW] = &DD[BE  *size_Mat];
-		   D.f[BE] = &DD[TW  *size_Mat];
-		   D.f[BS] = &DD[TN  *size_Mat];
-		   D.f[TN] = &DD[BS  *size_Mat];
-		   D.f[TS] = &DD[BN  *size_Mat];
-		   D.f[BN] = &DD[TS  *size_Mat];
-		   D.f[REST] = &DD[REST*size_Mat];
-		   D.f[TNE] = &DD[BSW *size_Mat];
-		   D.f[TSW] = &DD[BNE *size_Mat];
-		   D.f[TSE] = &DD[BNW *size_Mat];
-		   D.f[TNW] = &DD[BSE *size_Mat];
-		   D.f[BNE] = &DD[TSW *size_Mat];
-		   D.f[BSW] = &DD[TNE *size_Mat];
-		   D.f[BSE] = &DD[TNW *size_Mat];
-		   D.f[BNW] = &DD[TSE *size_Mat];
+		   D.f[DIR_M00] = &DD[DIR_P00   *size_Mat];
+		   D.f[DIR_P00] = &DD[DIR_M00   *size_Mat];
+		   D.f[DIR_0M0] = &DD[DIR_0P0   *size_Mat];
+		   D.f[DIR_0P0] = &DD[DIR_0M0   *size_Mat];
+		   D.f[DIR_00M] = &DD[DIR_00P   *size_Mat];
+		   D.f[DIR_00P] = &DD[DIR_00M   *size_Mat];
+		   D.f[DIR_MM0] = &DD[DIR_PP0  *size_Mat];
+		   D.f[DIR_PP0] = &DD[DIR_MM0  *size_Mat];
+		   D.f[DIR_MP0] = &DD[DIR_PM0  *size_Mat];
+		   D.f[DIR_PM0] = &DD[DIR_MP0  *size_Mat];
+		   D.f[DIR_M0M] = &DD[DIR_P0P  *size_Mat];
+		   D.f[DIR_P0P] = &DD[DIR_M0M  *size_Mat];
+		   D.f[DIR_M0P] = &DD[DIR_P0M  *size_Mat];
+		   D.f[DIR_P0M] = &DD[DIR_M0P  *size_Mat];
+		   D.f[DIR_0MM] = &DD[DIR_0PP  *size_Mat];
+		   D.f[DIR_0PP] = &DD[DIR_0MM  *size_Mat];
+		   D.f[DIR_0MP] = &DD[DIR_0PM  *size_Mat];
+		   D.f[DIR_0PM] = &DD[DIR_0MP  *size_Mat];
+		   D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		   D.f[DIR_PPP] = &DD[DIR_MMM *size_Mat];
+		   D.f[DIR_MMP] = &DD[DIR_PPM *size_Mat];
+		   D.f[DIR_PMP] = &DD[DIR_MPM *size_Mat];
+		   D.f[DIR_MPP] = &DD[DIR_PMM *size_Mat];
+		   D.f[DIR_PPM] = &DD[DIR_MMP *size_Mat];
+		   D.f[DIR_MMM] = &DD[DIR_PPP *size_Mat];
+		   D.f[DIR_PMM] = &DD[DIR_MPP *size_Mat];
+		   D.f[DIR_MPM] = &DD[DIR_PMP *size_Mat];
 	   }
 	   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	   //Test
-	   //(D.f[REST])[k]=c1o10;
+	   //(D.f[DIR_000])[k]=c1o10;
 	   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	   //ToDo anders Klammern
@@ -307,208 +307,208 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);// - c2over27 * drho;
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);// - c2over27 * drho;
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);// - c1over54 * drho;
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);// - c1over54 * drho;
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);// - c1over216 * drho;
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -553,7 +553,7 @@ extern "C" __global__ void QVelDeviceCompPlusSlip27(
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
+__global__ void QVeloDeviceEQ27(real* VeloX,
 										   real* VeloY,
 										   real* VeloZ,
                                            real* DD, 
@@ -613,95 +613,95 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
             // based on BGK Plus Comp
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[ke   ];
-			real mfabb = (D.f[W   ])[kw   ];
-			real mfbcb = (D.f[N   ])[kn   ];
-			real mfbab = (D.f[S   ])[ks   ];
-			real mfbbc = (D.f[T   ])[kt   ];
-			real mfbba = (D.f[B   ])[kb   ];
-			real mfccb = (D.f[NE  ])[kne  ];
-			real mfaab = (D.f[SW  ])[ksw  ];
-			real mfcab = (D.f[SE  ])[kse  ];
-			real mfacb = (D.f[NW  ])[knw  ];
-			real mfcbc = (D.f[TE  ])[kte  ];
-			real mfaba = (D.f[BW  ])[kbw  ];
-			real mfcba = (D.f[BE  ])[kbe  ];
-			real mfabc = (D.f[TW  ])[ktw  ];
-			real mfbcc = (D.f[TN  ])[ktn  ];
-			real mfbaa = (D.f[BS  ])[kbs  ];
-			real mfbca = (D.f[BN  ])[kbn  ];
-			real mfbac = (D.f[TS  ])[kts  ];
-			real mfbbb = (D.f[REST])[kzero];
-			real mfccc = (D.f[TNE ])[ktne ];
-			real mfaac = (D.f[TSW ])[ktsw ];
-			real mfcac = (D.f[TSE ])[ktse ];
-			real mfacc = (D.f[TNW ])[ktnw ];
-			real mfcca = (D.f[BNE ])[kbne ];
-			real mfaaa = (D.f[BSW ])[kbsw ];
-			real mfcaa = (D.f[BSE ])[kbse ];
-			real mfaca = (D.f[BNW ])[kbnw ];
+			real mfcbb = (D.f[DIR_P00   ])[ke   ];
+			real mfabb = (D.f[DIR_M00   ])[kw   ];
+			real mfbcb = (D.f[DIR_0P0   ])[kn   ];
+			real mfbab = (D.f[DIR_0M0   ])[ks   ];
+			real mfbbc = (D.f[DIR_00P   ])[kt   ];
+			real mfbba = (D.f[DIR_00M   ])[kb   ];
+			real mfccb = (D.f[DIR_PP0  ])[kne  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw  ];
+			real mfcab = (D.f[DIR_PM0  ])[kse  ];
+			real mfacb = (D.f[DIR_MP0  ])[knw  ];
+			real mfcbc = (D.f[DIR_P0P  ])[kte  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw  ];
+			real mfcba = (D.f[DIR_P0M  ])[kbe  ];
+			real mfabc = (D.f[DIR_M0P  ])[ktw  ];
+			real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
+			real mfbca = (D.f[DIR_0PM  ])[kbn  ];
+			real mfbac = (D.f[DIR_0MP  ])[kts  ];
+			real mfbbb = (D.f[DIR_000])[kzero];
+			real mfccc = (D.f[DIR_PPP ])[ktne ];
+			real mfaac = (D.f[DIR_MMP ])[ktsw ];
+			real mfcac = (D.f[DIR_PMP ])[ktse ];
+			real mfacc = (D.f[DIR_MPP ])[ktnw ];
+			real mfcca = (D.f[DIR_PPM ])[kbne ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw ];
+			real mfcaa = (D.f[DIR_PMM ])[kbse ];
+			real mfaca = (D.f[DIR_MPM ])[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho   = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 							 mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
@@ -763,33 +763,33 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
 			mfcaa = -rho * XXc * YYa * ZZa - c1o216;
 			mfaca = -rho * XXa * YYc * ZZa - c1o216;
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E   ])[ke   ] = mfabb;//mfcbb;
-			(D.f[W   ])[kw   ] = mfcbb;//mfabb;
-			(D.f[N   ])[kn   ] = mfbab;//mfbcb;
-			(D.f[S   ])[ks   ] = mfbcb;//mfbab;
-			(D.f[T   ])[kt   ] = mfbba;//mfbbc;
-			(D.f[B   ])[kb   ] = mfbbc;//mfbba;
-			(D.f[NE  ])[kne  ] = mfaab;//mfccb;
-			(D.f[SW  ])[ksw  ] = mfccb;//mfaab;
-			(D.f[SE  ])[kse  ] = mfacb;//mfcab;
-			(D.f[NW  ])[knw  ] = mfcab;//mfacb;
-			(D.f[TE  ])[kte  ] = mfaba;//mfcbc;
-			(D.f[BW  ])[kbw  ] = mfcbc;//mfaba;
-			(D.f[BE  ])[kbe  ] = mfabc;//mfcba;
-			(D.f[TW  ])[ktw  ] = mfcba;//mfabc;
-			(D.f[TN  ])[ktn  ] = mfbaa;//mfbcc;
-			(D.f[BS  ])[kbs  ] = mfbcc;//mfbaa;
-			(D.f[BN  ])[kbn  ] = mfbac;//mfbca;
-			(D.f[TS  ])[kts  ] = mfbca;//mfbac;
-			(D.f[REST])[kzero] = mfbbb;//mfbbb;
-			(D.f[TNE ])[ktne ] = mfaaa;//mfccc;
-			(D.f[TSW ])[ktsw ] = mfcca;//mfaac;
-			(D.f[TSE ])[ktse ] = mfaca;//mfcac;
-			(D.f[TNW ])[ktnw ] = mfcaa;//mfacc;
-			(D.f[BNE ])[kbne ] = mfaac;//mfcca;
-			(D.f[BSW ])[kbsw ] = mfccc;//mfaaa;
-			(D.f[BSE ])[kbse ] = mfacc;//mfcaa;
-			(D.f[BNW ])[kbnw ] = mfcac;//mfaca;
+			(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
+			(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
+			(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
+			(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
+			(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
+			(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
+			(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
+			(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
+			(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
+			(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
+			(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
+			(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
+			(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
+			(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
+			(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
+			(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
+			(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
+			(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+			(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
+			(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
+			(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
+			(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
+			(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
+			(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
+			(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
+			(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
+			(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -833,7 +833,7 @@ extern "C" __global__ void QVeloDeviceEQ27(real* VeloX,
 
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVeloStreetDeviceEQ27(
+__global__ void QVeloStreetDeviceEQ27(
 	real* veloXfraction,
 	real* veloYfraction,
 	int*  naschVelo,
@@ -894,95 +894,95 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 		Distributions27 D;
 		if (isEvenTimestep == true)
 		{
-			D.f[E   ] = &DD[E   *size_Mat];
-			D.f[W   ] = &DD[W   *size_Mat];
-			D.f[N   ] = &DD[N   *size_Mat];
-			D.f[S   ] = &DD[S   *size_Mat];
-			D.f[T   ] = &DD[T   *size_Mat];
-			D.f[B   ] = &DD[B   *size_Mat];
-			D.f[NE  ] = &DD[NE  *size_Mat];
-			D.f[SW  ] = &DD[SW  *size_Mat];
-			D.f[SE  ] = &DD[SE  *size_Mat];
-			D.f[NW  ] = &DD[NW  *size_Mat];
-			D.f[TE  ] = &DD[TE  *size_Mat];
-			D.f[BW  ] = &DD[BW  *size_Mat];
-			D.f[BE  ] = &DD[BE  *size_Mat];
-			D.f[TW  ] = &DD[TW  *size_Mat];
-			D.f[TN  ] = &DD[TN  *size_Mat];
-			D.f[BS  ] = &DD[BS  *size_Mat];
-			D.f[BN  ] = &DD[BN  *size_Mat];
-			D.f[TS  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[TNE *size_Mat];
-			D.f[TSW ] = &DD[TSW *size_Mat];
-			D.f[TSE ] = &DD[TSE *size_Mat];
-			D.f[TNW ] = &DD[TNW *size_Mat];
-			D.f[BNE ] = &DD[BNE *size_Mat];
-			D.f[BSW ] = &DD[BSW *size_Mat];
-			D.f[BSE ] = &DD[BSE *size_Mat];
-			D.f[BNW ] = &DD[BNW *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 		}
 		else
 		{
-			D.f[W   ] = &DD[E   *size_Mat];
-			D.f[E   ] = &DD[W   *size_Mat];
-			D.f[S   ] = &DD[N   *size_Mat];
-			D.f[N   ] = &DD[S   *size_Mat];
-			D.f[B   ] = &DD[T   *size_Mat];
-			D.f[T   ] = &DD[B   *size_Mat];
-			D.f[SW  ] = &DD[NE  *size_Mat];
-			D.f[NE  ] = &DD[SW  *size_Mat];
-			D.f[NW  ] = &DD[SE  *size_Mat];
-			D.f[SE  ] = &DD[NW  *size_Mat];
-			D.f[BW  ] = &DD[TE  *size_Mat];
-			D.f[TE  ] = &DD[BW  *size_Mat];
-			D.f[TW  ] = &DD[BE  *size_Mat];
-			D.f[BE  ] = &DD[TW  *size_Mat];
-			D.f[BS  ] = &DD[TN  *size_Mat];
-			D.f[TN  ] = &DD[BS  *size_Mat];
-			D.f[TS  ] = &DD[BN  *size_Mat];
-			D.f[BN  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[BSW *size_Mat];
-			D.f[TSW ] = &DD[BNE *size_Mat];
-			D.f[TSE ] = &DD[BNW *size_Mat];
-			D.f[TNW ] = &DD[BSE *size_Mat];
-			D.f[BNE ] = &DD[TSW *size_Mat];
-			D.f[BSW ] = &DD[TNE *size_Mat];
-			D.f[BSE ] = &DD[TNW *size_Mat];
-			D.f[BNW ] = &DD[TSE *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 		}
 
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		// based on BGK Plus Comp
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[E   ])[ke   ];
-		real mfabb = (D.f[W   ])[kw   ];
-		real mfbcb = (D.f[N   ])[kn   ];
-		real mfbab = (D.f[S   ])[ks   ];
-		real mfbbc = (D.f[T   ])[kt   ];
-		real mfbba = (D.f[B   ])[kb   ];
-		real mfccb = (D.f[NE  ])[kne  ];
-		real mfaab = (D.f[SW  ])[ksw  ];
-		real mfcab = (D.f[SE  ])[kse  ];
-		real mfacb = (D.f[NW  ])[knw  ];
-		real mfcbc = (D.f[TE  ])[kte  ];
-		real mfaba = (D.f[BW  ])[kbw  ];
-		real mfcba = (D.f[BE  ])[kbe  ];
-		real mfabc = (D.f[TW  ])[ktw  ];
-		real mfbcc = (D.f[TN  ])[ktn  ];
-		real mfbaa = (D.f[BS  ])[kbs  ];
-		real mfbca = (D.f[BN  ])[kbn  ];
-		real mfbac = (D.f[TS  ])[kts  ];
-		real mfbbb = (D.f[REST])[kzero];
-		real mfccc = (D.f[TNE ])[ktne ];
-		real mfaac = (D.f[TSW ])[ktsw ];
-		real mfcac = (D.f[TSE ])[ktse ];
-		real mfacc = (D.f[TNW ])[ktnw ];
-		real mfcca = (D.f[BNE ])[kbne ];
-		real mfaaa = (D.f[BSW ])[kbsw ];
-		real mfcaa = (D.f[BSE ])[kbse ];
-		real mfaca = (D.f[BNW ])[kbnw ];
+		real mfcbb = (D.f[DIR_P00   ])[ke   ];
+		real mfabb = (D.f[DIR_M00   ])[kw   ];
+		real mfbcb = (D.f[DIR_0P0   ])[kn   ];
+		real mfbab = (D.f[DIR_0M0   ])[ks   ];
+		real mfbbc = (D.f[DIR_00P   ])[kt   ];
+		real mfbba = (D.f[DIR_00M   ])[kb   ];
+		real mfccb = (D.f[DIR_PP0  ])[kne  ];
+		real mfaab = (D.f[DIR_MM0  ])[ksw  ];
+		real mfcab = (D.f[DIR_PM0  ])[kse  ];
+		real mfacb = (D.f[DIR_MP0  ])[knw  ];
+		real mfcbc = (D.f[DIR_P0P  ])[kte  ];
+		real mfaba = (D.f[DIR_M0M  ])[kbw  ];
+		real mfcba = (D.f[DIR_P0M  ])[kbe  ];
+		real mfabc = (D.f[DIR_M0P  ])[ktw  ];
+		real mfbcc = (D.f[DIR_0PP  ])[ktn  ];
+		real mfbaa = (D.f[DIR_0MM  ])[kbs  ];
+		real mfbca = (D.f[DIR_0PM  ])[kbn  ];
+		real mfbac = (D.f[DIR_0MP  ])[kts  ];
+		real mfbbb = (D.f[DIR_000])[kzero];
+		real mfccc = (D.f[DIR_PPP ])[ktne ];
+		real mfaac = (D.f[DIR_MMP ])[ktsw ];
+		real mfcac = (D.f[DIR_PMP ])[ktse ];
+		real mfacc = (D.f[DIR_MPP ])[ktnw ];
+		real mfcca = (D.f[DIR_PPM ])[kbne ];
+		real mfaaa = (D.f[DIR_MMM ])[kbsw ];
+		real mfcaa = (D.f[DIR_PMM ])[kbse ];
+		real mfaca = (D.f[DIR_MPM ])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////////
 		real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
 			        mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
@@ -1049,33 +1049,33 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 		mfcaa = -rho * XXc * YYa * ZZa - c1o216;
 		mfaca = -rho * XXa * YYc * ZZa - c1o216;
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		(D.f[E   ])[ke   ] = mfabb;//mfcbb;
-		(D.f[W   ])[kw   ] = mfcbb;//mfabb;
-		(D.f[N   ])[kn   ] = mfbab;//mfbcb;
-		(D.f[S   ])[ks   ] = mfbcb;//mfbab;
-		(D.f[T   ])[kt   ] = mfbba;//mfbbc;
-		(D.f[B   ])[kb   ] = mfbbc;//mfbba;
-		(D.f[NE  ])[kne  ] = mfaab;//mfccb;
-		(D.f[SW  ])[ksw  ] = mfccb;//mfaab;
-		(D.f[SE  ])[kse  ] = mfacb;//mfcab;
-		(D.f[NW  ])[knw  ] = mfcab;//mfacb;
-		(D.f[TE  ])[kte  ] = mfaba;//mfcbc;
-		(D.f[BW  ])[kbw  ] = mfcbc;//mfaba;
-		(D.f[BE  ])[kbe  ] = mfabc;//mfcba;
-		(D.f[TW  ])[ktw  ] = mfcba;//mfabc;
-		(D.f[TN  ])[ktn  ] = mfbaa;//mfbcc;
-		(D.f[BS  ])[kbs  ] = mfbcc;//mfbaa;
-		(D.f[BN  ])[kbn  ] = mfbac;//mfbca;
-		(D.f[TS  ])[kts  ] = mfbca;//mfbac;
-		(D.f[REST])[kzero] = mfbbb;//mfbbb;
-		(D.f[TNE ])[ktne ] = mfaaa;//mfccc;
-		(D.f[TSW ])[ktsw ] = mfcca;//mfaac;
-		(D.f[TSE ])[ktse ] = mfaca;//mfcac;
-		(D.f[TNW ])[ktnw ] = mfcaa;//mfacc;
-		(D.f[BNE ])[kbne ] = mfaac;//mfcca;
-		(D.f[BSW ])[kbsw ] = mfccc;//mfaaa;
-		(D.f[BSE ])[kbse ] = mfacc;//mfcaa;
-		(D.f[BNW ])[kbnw ] = mfcac;//mfaca;
+		(D.f[DIR_P00   ])[ke   ] = mfabb;//mfcbb;
+		(D.f[DIR_M00   ])[kw   ] = mfcbb;//mfabb;
+		(D.f[DIR_0P0   ])[kn   ] = mfbab;//mfbcb;
+		(D.f[DIR_0M0   ])[ks   ] = mfbcb;//mfbab;
+		(D.f[DIR_00P   ])[kt   ] = mfbba;//mfbbc;
+		(D.f[DIR_00M   ])[kb   ] = mfbbc;//mfbba;
+		(D.f[DIR_PP0  ])[kne  ] = mfaab;//mfccb;
+		(D.f[DIR_MM0  ])[ksw  ] = mfccb;//mfaab;
+		(D.f[DIR_PM0  ])[kse  ] = mfacb;//mfcab;
+		(D.f[DIR_MP0  ])[knw  ] = mfcab;//mfacb;
+		(D.f[DIR_P0P  ])[kte  ] = mfaba;//mfcbc;
+		(D.f[DIR_M0M  ])[kbw  ] = mfcbc;//mfaba;
+		(D.f[DIR_P0M  ])[kbe  ] = mfabc;//mfcba;
+		(D.f[DIR_M0P  ])[ktw  ] = mfcba;//mfabc;
+		(D.f[DIR_0PP  ])[ktn  ] = mfbaa;//mfbcc;
+		(D.f[DIR_0MM  ])[kbs  ] = mfbcc;//mfbaa;
+		(D.f[DIR_0PM  ])[kbn  ] = mfbac;//mfbca;
+		(D.f[DIR_0MP  ])[kts  ] = mfbca;//mfbac;
+		(D.f[DIR_000])[kzero] = mfbbb;//mfbbb;
+		(D.f[DIR_PPP ])[ktne ] = mfaaa;//mfccc;
+		(D.f[DIR_MMP ])[ktsw ] = mfcca;//mfaac;
+		(D.f[DIR_PMP ])[ktse ] = mfaca;//mfcac;
+		(D.f[DIR_MPP ])[ktnw ] = mfcaa;//mfacc;
+		(D.f[DIR_PPM ])[kbne ] = mfaac;//mfcca;
+		(D.f[DIR_MMM ])[kbsw ] = mfccc;//mfaaa;
+		(D.f[DIR_PMM ])[kbse ] = mfacc;//mfcaa;
+		(D.f[DIR_MPM ])[kbnw ] = mfcac;//mfaca;
 	}
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1119,8 +1119,7 @@ extern "C" __global__ void QVeloStreetDeviceEQ27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
-													int iny,
+__global__ void QVelDeviceIncompHighNu27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -1138,63 +1137,63 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1219,32 +1218,32 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1279,37 +1278,37 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[E   ])[ke   ];
-      f_W   = (D.f[W   ])[kw   ];
-      f_N   = (D.f[N   ])[kn   ];
-      f_S   = (D.f[S   ])[ks   ];
-      f_T   = (D.f[T   ])[kt   ];
-      f_B   = (D.f[B   ])[kb   ];
-      f_NE  = (D.f[NE  ])[kne  ];
-      f_SW  = (D.f[SW  ])[ksw  ];
-      f_SE  = (D.f[SE  ])[kse  ];
-      f_NW  = (D.f[NW  ])[knw  ];
-      f_TE  = (D.f[TE  ])[kte  ];
-      f_BW  = (D.f[BW  ])[kbw  ];
-      f_BE  = (D.f[BE  ])[kbe  ];
-      f_TW  = (D.f[TW  ])[ktw  ];
-      f_TN  = (D.f[TN  ])[ktn  ];
-      f_BS  = (D.f[BS  ])[kbs  ];
-      f_BN  = (D.f[BN  ])[kbn  ];
-      f_TS  = (D.f[TS  ])[kts  ];
-      f_TNE = (D.f[TNE ])[ktne ];
-      f_TSW = (D.f[TSW ])[ktsw ];
-      f_TSE = (D.f[TSE ])[ktse ];
-      f_TNW = (D.f[TNW ])[ktnw ];
-      f_BNE = (D.f[BNE ])[kbne ];
-      f_BSW = (D.f[BSW ])[kbsw ];
-      f_BSE = (D.f[BSE ])[kbse ];
-      f_BNW = (D.f[BNW ])[kbnw ];
+      f_E   = (D.f[DIR_P00   ])[ke   ];
+      f_W   = (D.f[DIR_M00   ])[kw   ];
+      f_N   = (D.f[DIR_0P0   ])[kn   ];
+      f_S   = (D.f[DIR_0M0   ])[ks   ];
+      f_T   = (D.f[DIR_00P   ])[kt   ];
+      f_B   = (D.f[DIR_00M   ])[kb   ];
+      f_NE  = (D.f[DIR_PP0  ])[kne  ];
+      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+      f_SE  = (D.f[DIR_PM0  ])[kse  ];
+      f_NW  = (D.f[DIR_MP0  ])[knw  ];
+      f_TE  = (D.f[DIR_P0P  ])[kte  ];
+      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+      f_TS  = (D.f[DIR_0MP  ])[kts  ];
+      f_TNE = (D.f[DIR_PPP ])[ktne ];
+      f_TSW = (D.f[DIR_MMP ])[ktsw ];
+      f_TSE = (D.f[DIR_PMP ])[ktse ];
+      f_TNW = (D.f[DIR_MPP ])[ktnw ];
+      f_BNE = (D.f[DIR_PPM ])[kbne ];
+      f_BSW = (D.f[DIR_MMM ])[kbsw ];
+      f_BSE = (D.f[DIR_PMM ])[kbse ];
+      f_BNW = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1329,67 +1328,67 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	  //ToDo anders Klammern
@@ -1398,182 +1397,182 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) /** (one + drho)*/-cu_sq); 
-         (D.f[W])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
+         (D.f[DIR_M00])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) /** (one + drho)*/-cu_sq); 
-         (D.f[E])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
+         (D.f[DIR_P00])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[S])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
+         (D.f[DIR_0M0])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[N])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
+         (D.f[DIR_0P0])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[B])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
+         (D.f[DIR_00M])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[T])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
+         (D.f[DIR_00P])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[SW])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
+         (D.f[DIR_MM0])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[NE])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
+         (D.f[DIR_PP0])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[NW])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
+         (D.f[DIR_MP0])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) /** (one + drho)*/-cu_sq); 
-         (D.f[SE])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
+         (D.f[DIR_PM0])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BW])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_M0M])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TE])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_P0P])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TW])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_M0P])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BE])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_P0M])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BS])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0MM])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TN])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0PP])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TS])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0MP])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BN])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_0PM])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BSW])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MMM])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TNE])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PPP])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TSW])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MMP])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BNE])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PPM])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BNW])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MPM])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TSE])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PMP])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[TNW])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_MPP])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) /** (one + drho)*/-cu_sq); 
-         (D.f[BSE])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         (D.f[DIR_PMM])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
       }
    }
 }
@@ -1618,7 +1617,7 @@ extern "C" __global__ void QVelDeviceIncompHighNu27(int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompHighNu27(
+__global__ void QVelDeviceCompHighNu27(
 													real* vx,
 													real* vy,
 													real* vz,
@@ -1636,63 +1635,63 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -1717,32 +1716,32 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -1777,63 +1776,63 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_E   = (D.f[E   ])[ke   ];
-      f_W   = (D.f[W   ])[kw   ];
-      f_N   = (D.f[N   ])[kn   ];
-      f_S   = (D.f[S   ])[ks   ];
-      f_T   = (D.f[T   ])[kt   ];
-      f_B   = (D.f[B   ])[kb   ];
-      f_NE  = (D.f[NE  ])[kne  ];
-      f_SW  = (D.f[SW  ])[ksw  ];
-      f_SE  = (D.f[SE  ])[kse  ];
-      f_NW  = (D.f[NW  ])[knw  ];
-      f_TE  = (D.f[TE  ])[kte  ];
-      f_BW  = (D.f[BW  ])[kbw  ];
-      f_BE  = (D.f[BE  ])[kbe  ];
-      f_TW  = (D.f[TW  ])[ktw  ];
-      f_TN  = (D.f[TN  ])[ktn  ];
-      f_BS  = (D.f[BS  ])[kbs  ];
-      f_BN  = (D.f[BN  ])[kbn  ];
-      f_TS  = (D.f[TS  ])[kts  ];
-      f_TNE = (D.f[TNE ])[ktne ];
-      f_TSW = (D.f[TSW ])[ktsw ];
-      f_TSE = (D.f[TSE ])[ktse ];
-      f_TNW = (D.f[TNW ])[ktnw ];
-      f_BNE = (D.f[BNE ])[kbne ];
-      f_BSW = (D.f[BSW ])[kbsw ];
-      f_BSE = (D.f[BSE ])[kbse ];
-      f_BNW = (D.f[BNW ])[kbnw ];
-      //f_W    = (D.f[E   ])[ke   ];
-      //f_E    = (D.f[W   ])[kw   ];
-      //f_S    = (D.f[N   ])[kn   ];
-      //f_N    = (D.f[S   ])[ks   ];
-      //f_B    = (D.f[T   ])[kt   ];
-      //f_T    = (D.f[B   ])[kb   ];
-      //f_SW   = (D.f[NE  ])[kne  ];
-      //f_NE   = (D.f[SW  ])[ksw  ];
-      //f_NW   = (D.f[SE  ])[kse  ];
-      //f_SE   = (D.f[NW  ])[knw  ];
-      //f_BW   = (D.f[TE  ])[kte  ];
-      //f_TE   = (D.f[BW  ])[kbw  ];
-      //f_TW   = (D.f[BE  ])[kbe  ];
-      //f_BE   = (D.f[TW  ])[ktw  ];
-      //f_BS   = (D.f[TN  ])[ktn  ];
-      //f_TN   = (D.f[BS  ])[kbs  ];
-      //f_TS   = (D.f[BN  ])[kbn  ];
-      //f_BN   = (D.f[TS  ])[kts  ];
-      //f_BSW  = (D.f[TNE ])[ktne ];
-      //f_BNE  = (D.f[TSW ])[ktsw ];
-      //f_BNW  = (D.f[TSE ])[ktse ];
-      //f_BSE  = (D.f[TNW ])[ktnw ];
-      //f_TSW  = (D.f[BNE ])[kbne ];
-      //f_TNE  = (D.f[BSW ])[kbsw ];
-      //f_TNW  = (D.f[BSE ])[kbse ];
-      //f_TSE  = (D.f[BNW ])[kbnw ];
+      f_E   = (D.f[DIR_P00   ])[ke   ];
+      f_W   = (D.f[DIR_M00   ])[kw   ];
+      f_N   = (D.f[DIR_0P0   ])[kn   ];
+      f_S   = (D.f[DIR_0M0   ])[ks   ];
+      f_T   = (D.f[DIR_00P   ])[kt   ];
+      f_B   = (D.f[DIR_00M   ])[kb   ];
+      f_NE  = (D.f[DIR_PP0  ])[kne  ];
+      f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+      f_SE  = (D.f[DIR_PM0  ])[kse  ];
+      f_NW  = (D.f[DIR_MP0  ])[knw  ];
+      f_TE  = (D.f[DIR_P0P  ])[kte  ];
+      f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+      f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+      f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+      f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+      f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+      f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+      f_TS  = (D.f[DIR_0MP  ])[kts  ];
+      f_TNE = (D.f[DIR_PPP ])[ktne ];
+      f_TSW = (D.f[DIR_MMP ])[ktsw ];
+      f_TSE = (D.f[DIR_PMP ])[ktse ];
+      f_TNW = (D.f[DIR_MPP ])[ktnw ];
+      f_BNE = (D.f[DIR_PPM ])[kbne ];
+      f_BSW = (D.f[DIR_MMM ])[kbsw ];
+      f_BSE = (D.f[DIR_PMM ])[kbse ];
+      f_BNW = (D.f[DIR_MPM ])[kbnw ];
+      //f_W    = (D.f[DIR_P00   ])[ke   ];
+      //f_E    = (D.f[DIR_M00   ])[kw   ];
+      //f_S    = (D.f[DIR_0P0   ])[kn   ];
+      //f_N    = (D.f[DIR_0M0   ])[ks   ];
+      //f_B    = (D.f[DIR_00P   ])[kt   ];
+      //f_T    = (D.f[DIR_00M   ])[kb   ];
+      //f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      //f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      //f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      //f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      //f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      //f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      //f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      //f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      //f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      //f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      //f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      //f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      //f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      //f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      //f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      //f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      //f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      //f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      //f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      //f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -1853,67 +1852,67 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	  //ToDo anders Klammern
@@ -1922,234 +1921,234 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
-         //(D.f[W])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q) - c2over27 * drho;
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=((c1o1 - q) * f_E + q * ((f_E + f_W) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloX     )) / (q + c1o1) ;
+         //(D.f[DIR_M00])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
-         //(D.f[E])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q) - c2over27 * drho;
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=((c1o1 - q) * f_W + q * ((f_W + f_E) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloX     )) / (q + c1o1) ;
+         //(D.f[DIR_P00])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
-         //(D.f[S])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q) - c2over27 * drho;
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=((c1o1 - q) * f_N + q * ((f_N + f_S) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloY     )) / (q + c1o1) ;
+         //(D.f[DIR_0M0])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
-         //(D.f[N])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q) - c2over27 * drho;
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=((c1o1 - q) * f_S + q * ((f_S + f_N) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloY     )) / (q + c1o1) ;
+         //(D.f[DIR_0P0])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
-         //(D.f[B])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q) - c2over27 * drho;
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=((c1o1 - q) * f_T + q * ((f_T + f_B) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*( VeloZ     )) / (q + c1o1) ;
+         //(D.f[DIR_00M])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
-         //(D.f[T])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q) - c2over27 * drho;
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=((c1o1 - q) * f_B + q * ((f_B + f_T) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c2o27*(-VeloZ     )) / (q + c1o1) ;
+         //(D.f[DIR_00P])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q) - c2over27 * drho;
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
-         //(D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=((c1o1 - q) * f_NE + q * ((f_NE + f_SW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_MM0])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
-         //(D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=((c1o1 - q) * f_SW + q * ((f_SW + f_NE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_PP0])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
-         //(D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=((c1o1 - q) * f_SE + q * ((f_SE + f_NW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_MP0])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
-         //(D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q) - c1over54 * drho;
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=((c1o1 - q) * f_NW + q * ((f_NW + f_SE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloY)) / (q + c1o1) ;
+         //(D.f[DIR_PM0])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
-         //(D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=((c1o1 - q) * f_TE + q * ((f_TE + f_BW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_M0M])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
-         //(D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=((c1o1 - q) * f_BW + q * ((f_BW + f_TE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_P0P])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
-         //(D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=((c1o1 - q) * f_BE + q * ((f_BE + f_TW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloX-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_M0P])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
-         //(D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=((c1o1 - q) * f_TW + q * ((f_TW + f_BE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloX+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_P0M])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=((c1o1 - q) * f_TN + q * ((f_TN + f_BS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0MM])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=((c1o1 - q) * f_BS + q * ((f_BS + f_TN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0PP])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=((c1o1 - q) * f_BN + q * ((f_BN + f_TS) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*( VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0MP])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q) - c1over54 * drho;
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=((c1o1 - q) * f_TS + q * ((f_TS + f_BN) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o54*(-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_0PM])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q) - c1over54 * drho;
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=((c1o1 - q) * f_TNE + q * ((f_TNE + f_BSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MMM])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=((c1o1 - q) * f_BSW + q * ((f_BSW + f_TNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PPP])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=((c1o1 - q) * f_BNE + q * ((f_BNE + f_TSW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MMP])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=((c1o1 - q) * f_TSW + q * ((f_TSW + f_BNE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PPM])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=((c1o1 - q) * f_TSE + q * ((f_TSE + f_BNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MPM])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=((c1o1 - q) * f_BNW + q * ((f_BNW + f_TSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PMP])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
-         //(D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=((c1o1 - q) * f_BSE + q * ((f_BSE + f_TNW) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*( VeloX-VeloY-VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_MPP])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
-         //(D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=((c1o1 - q) * f_TNW + q * ((f_TNW + f_BSE) * (c1o1 - om1) + om1 * c2o1 * feq) - c6o1*c1o216*(-VeloX+VeloY+VeloZ)) / (q + c1o1) ;
+         //(D.f[DIR_PMM])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q) - c1over216 * drho;
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -2194,7 +2193,7 @@ extern "C" __global__ void QVelDeviceCompHighNu27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompZeroPress27(
+__global__ void QVelDeviceCompZeroPress27(
 														real* velocityX,
 														real* velocityY,
 														real* velocityZ,
@@ -2286,39 +2285,39 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
       //!
       real drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                      f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                     f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[REST])[kzero]); 
+                     f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
       real vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                       ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -2343,238 +2342,238 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Update distributions with subgrid distance (q) between zero and one
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[E])[k];
+      q = (subgridD.q[DIR_P00])[k];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloX;
-         (dist.f[W])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_E, f_W, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[W])[k];
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloX;
-         (dist.f[E])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloWithPressureBC(q, f_W, f_E, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[N])[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloY;
-         (dist.f[S])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloWithPressureBC(q, f_N, f_S, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[S])[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloY;
-         (dist.f[N])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_S, f_N, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[T])[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloZ;
-         (dist.f[B])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloWithPressureBC(q, f_T, f_B, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[B])[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloZ;
-         (dist.f[T])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27);
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloWithPressureBC(q, f_B, f_T, feq, omega, drho, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[NE])[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX + VeloY;
-         (dist.f[SW])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NE, f_SW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[SW])[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloY;
-         (dist.f[NE])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SW, f_NE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[SE])[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX - VeloY;
-         (dist.f[NW])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_SE, f_NW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[NW])[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX + VeloY;
-         (dist.f[SE])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_NW, f_SE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TE])[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX + VeloZ;
-         (dist.f[BW])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TE, f_BW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BW])[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloZ;
-         (dist.f[TE])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BW, f_TE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BE])[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX - VeloZ;
-         (dist.f[TW])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BE, f_TW, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TW])[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX + VeloZ;
-         (dist.f[BE])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TW, f_BE, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TN])[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloY + VeloZ;
-         (dist.f[BS])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TN, f_BS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BS])[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloY - VeloZ;
-         (dist.f[TN])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BS, f_TN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BN])[k];
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloY - VeloZ;
-         (dist.f[TS])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BN, f_TS, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TS])[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloY + VeloZ;
-         (dist.f[BN])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54);
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TS, f_BN, feq, omega, drho, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TNE])[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX + VeloY + VeloZ;
-         (dist.f[BSW])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNE, f_BSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BSW])[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX - VeloY - VeloZ;
-         (dist.f[TNE])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSW, f_TNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BNE])[k];
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX + VeloY - VeloZ;
-         (dist.f[TSW])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNE, f_TSW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TSW])[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX - VeloY + VeloZ;
-         (dist.f[BNE])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSW, f_BNE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TSE])[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX - VeloY + VeloZ;
-         (dist.f[BNW])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TSE, f_BNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BNW])[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX + VeloY - VeloZ;
-         (dist.f[TSE])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BNW, f_TSE, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BSE])[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX - VeloY - VeloZ;
-         (dist.f[TNW])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloWithPressureBC(q, f_BSE, f_TNW, feq, omega, drho, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TNW])[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX + VeloY + VeloZ;
-         (dist.f[BSE])[kbse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNW, f_BSE, feq, omega, drho, velocityBC, c1o216);
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloWithPressureBC(q, f_TNW, f_BSE, feq, omega, drho, velocityBC, c1o216);
       }
    }
 }
@@ -2620,7 +2619,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
+__global__ void QVelDeviceCompZeroPress1h27( int inx,
 														int iny,
 														real* vx,
 														real* vy,
@@ -2644,63 +2643,63 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -2739,32 +2738,32 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -2798,63 +2797,63 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  real vx1, vx2, vx3, drho, feq, q, cu_sq;
@@ -2871,182 +2870,182 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*( VeloXpur        )+c9o2*( vx1        )*( vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[W])[kw]= feq - c2o27 * drho;
+         (D.f[DIR_M00])[kw]= feq - c2o27 * drho;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*(-VeloXpur        )+c9o2*(-vx1        )*(-vx1        ) * (c1o1 + drho)-cu_sq); 
-         (D.f[E])[ke]= feq - c2o27 * drho;
+         (D.f[DIR_P00])[ke]= feq - c2o27 * drho;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*(    VeloYpur     )+c9o2*(     vx2    )*(     vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[S])[ks]= feq - c2o27 * drho;
+         (D.f[DIR_0M0])[ks]= feq - c2o27 * drho;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*(   -VeloYpur     )+c9o2*(    -vx2    )*(    -vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[N])[kn]= feq - c2o27 * drho;
+         (D.f[DIR_0P0])[kn]= feq - c2o27 * drho;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[B])[kb]= feq - c2o27 * drho;
+         (D.f[DIR_00M])[kb]= feq - c2o27 * drho;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[T])[kt]= feq - c2o27 * drho;
+         (D.f[DIR_00P])[kt]= feq - c2o27 * drho;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*( VeloXpur+VeloYpur    )+c9o2*( vx1+vx2    )*( vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SW])[ksw]= feq - c1o54 * drho;
+         (D.f[DIR_MM0])[ksw]= feq - c1o54 * drho;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(-VeloXpur-VeloYpur    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NE])[kne]= feq - c1o54 * drho;
+         (D.f[DIR_PP0])[kne]= feq - c1o54 * drho;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*( VeloXpur-VeloYpur    )+c9o2*( vx1-vx2    )*( vx1-vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[NW])[knw]= feq - c1o54 * drho;
+         (D.f[DIR_MP0])[knw]= feq - c1o54 * drho;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(-VeloXpur+VeloYpur    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    ) * (c1o1 + drho)-cu_sq); 
-         (D.f[SE])[kse]= feq - c1o54 * drho;
+         (D.f[DIR_PM0])[kse]= feq - c1o54 * drho;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*( VeloXpur    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BW])[kbw]= feq - c1o54 * drho;
+         (D.f[DIR_M0M])[kbw]= feq - c1o54 * drho;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(-VeloXpur    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TE])[kte]= feq - c1o54 * drho;
+         (D.f[DIR_P0P])[kte]= feq - c1o54 * drho;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*( VeloXpur    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TW])[ktw]= feq - c1o54 * drho;
+         (D.f[DIR_M0P])[ktw]= feq - c1o54 * drho;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(-VeloXpur    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BE])[kbe]= feq - c1o54 * drho;
+         (D.f[DIR_P0M])[kbe]= feq - c1o54 * drho;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(     VeloYpur+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BS])[kbs]= feq - c1o54 * drho;
+         (D.f[DIR_0MM])[kbs]= feq - c1o54 * drho;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(    -VeloYpur-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TN])[ktn]= feq - c1o54 * drho;
+         (D.f[DIR_0PP])[ktn]= feq - c1o54 * drho;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(     VeloYpur-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TS])[kts]= feq - c1o54 * drho;
+         (D.f[DIR_0MP])[kts]= feq - c1o54 * drho;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho+c3o1*(    -VeloYpur+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BN])[kbn]= feq - c1o54 * drho;
+         (D.f[DIR_0PM])[kbn]= feq - c1o54 * drho;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*( VeloXpur+VeloYpur+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSW])[kbsw]= feq - c1o216 * drho;
+         (D.f[DIR_MMM])[kbsw]= feq - c1o216 * drho;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*(-VeloXpur-VeloYpur-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNE])[ktne]= feq - c1o216 * drho;
+         (D.f[DIR_PPP])[ktne]= feq - c1o216 * drho;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*( VeloXpur+VeloYpur-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSW])[ktsw]= feq - c1o216 * drho;
+         (D.f[DIR_MMP])[ktsw]= feq - c1o216 * drho;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*(-VeloXpur-VeloYpur+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNE])[kbne]= feq - c1o216 * drho;
+         (D.f[DIR_PPM])[kbne]= feq - c1o216 * drho;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*( VeloXpur-VeloYpur+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BNW])[kbnw]= feq - c1o216 * drho;
+         (D.f[DIR_MPM])[kbnw]= feq - c1o216 * drho;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*(-VeloXpur+VeloYpur-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TSE])[ktse]= feq - c1o216 * drho;
+         (D.f[DIR_PMP])[ktse]= feq - c1o216 * drho;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*( VeloXpur-VeloYpur-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[TNW])[ktnw]= feq - c1o216 * drho;
+         (D.f[DIR_MPP])[ktnw]= feq - c1o216 * drho;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho+c3o1*(-VeloXpur+VeloYpur+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (c1o1 + drho)-cu_sq); 
-         (D.f[BSE])[kbse]= feq - c1o216 * drho;
+         (D.f[DIR_PMM])[kbse]= feq - c1o216 * drho;
       }
    }
 }
@@ -3091,7 +3090,7 @@ extern "C" __global__ void QVelDeviceCompZeroPress1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_BC_Vel_West_27( int nx, 
+__global__ void LB_BC_Vel_West_27( int nx, 
                                               int ny, 
                                               int nz, 
                                               int itz, 
@@ -3126,63 +3125,63 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
       Distributions27 D;
       if (isEvenTimestep==true)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
 
       ////////////////////////////////////////////////////////////////////////////////
@@ -3301,33 +3300,33 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
       real        f1_E,f1_W,f1_N,f1_S,f1_T,f1_B,f1_NE,f1_SW,f1_SE,f1_NW,f1_TE,f1_BW,f1_BE,f1_TW,f1_TN,f1_BS,f1_BN,f1_TS,f1_ZERO,
          f1_TNE,f1_TSW,f1_TSE,f1_TNW,f1_BNE,f1_BSW,f1_BSE,f1_BNW;
 
-      f1_W    = (D.f[E   ])[k1e   ];
-      f1_E    = (D.f[W   ])[k1w   ];
-      f1_S    = (D.f[N   ])[k1n   ];
-      f1_N    = (D.f[S   ])[k1s   ];
-      f1_B    = (D.f[T   ])[k1t   ];
-      f1_T    = (D.f[B   ])[k1b   ];
-      f1_SW   = (D.f[NE  ])[k1ne  ];
-      f1_NE   = (D.f[SW  ])[k1sw  ];
-      f1_NW   = (D.f[SE  ])[k1se  ];
-      f1_SE   = (D.f[NW  ])[k1nw  ];
-      f1_BW   = (D.f[TE  ])[k1te  ];
-      f1_TE   = (D.f[BW  ])[k1bw  ];
-      f1_TW   = (D.f[BE  ])[k1be  ];
-      f1_BE   = (D.f[TW  ])[k1tw  ];
-      f1_BS   = (D.f[TN  ])[k1tn  ];
-      f1_TN   = (D.f[BS  ])[k1bs  ];
-      f1_TS   = (D.f[BN  ])[k1bn  ];
-      f1_BN   = (D.f[TS  ])[k1ts  ];
-      f1_ZERO = (D.f[REST])[k1zero];
-      f1_BSW  = (D.f[TNE ])[k1tne ];
-      f1_BNE  = (D.f[TSW ])[k1tsw ];
-      f1_BNW  = (D.f[TSE ])[k1tse ];
-      f1_BSE  = (D.f[TNW ])[k1tnw ];
-      f1_TSW  = (D.f[BNE ])[k1bne ];
-      f1_TNE  = (D.f[BSW ])[k1bsw ];
-      f1_TNW  = (D.f[BSE ])[k1bse ];
-      f1_TSE  = (D.f[BNW ])[k1bnw ];
+      f1_W    = (D.f[DIR_P00   ])[k1e   ];
+      f1_E    = (D.f[DIR_M00   ])[k1w   ];
+      f1_S    = (D.f[DIR_0P0   ])[k1n   ];
+      f1_N    = (D.f[DIR_0M0   ])[k1s   ];
+      f1_B    = (D.f[DIR_00P   ])[k1t   ];
+      f1_T    = (D.f[DIR_00M   ])[k1b   ];
+      f1_SW   = (D.f[DIR_PP0  ])[k1ne  ];
+      f1_NE   = (D.f[DIR_MM0  ])[k1sw  ];
+      f1_NW   = (D.f[DIR_PM0  ])[k1se  ];
+      f1_SE   = (D.f[DIR_MP0  ])[k1nw  ];
+      f1_BW   = (D.f[DIR_P0P  ])[k1te  ];
+      f1_TE   = (D.f[DIR_M0M  ])[k1bw  ];
+      f1_TW   = (D.f[DIR_P0M  ])[k1be  ];
+      f1_BE   = (D.f[DIR_M0P  ])[k1tw  ];
+      f1_BS   = (D.f[DIR_0PP  ])[k1tn  ];
+      f1_TN   = (D.f[DIR_0MM  ])[k1bs  ];
+      f1_TS   = (D.f[DIR_0PM  ])[k1bn  ];
+      f1_BN   = (D.f[DIR_0MP  ])[k1ts  ];
+      f1_ZERO = (D.f[DIR_000])[k1zero];
+      f1_BSW  = (D.f[DIR_PPP ])[k1tne ];
+      f1_BNE  = (D.f[DIR_MMP ])[k1tsw ];
+      f1_BNW  = (D.f[DIR_PMP ])[k1tse ];
+      f1_BSE  = (D.f[DIR_MPP ])[k1tnw ];
+      f1_TSW  = (D.f[DIR_PPM ])[k1bne ];
+      f1_TNE  = (D.f[DIR_MMM ])[k1bsw ];
+      f1_TNW  = (D.f[DIR_PMM ])[k1bse ];
+      f1_TSE  = (D.f[DIR_MPM ])[k1bnw ];
 
       real drho1    =  f1_ZERO+f1_E+f1_W+f1_N+f1_S+f1_T+f1_B+f1_NE+f1_SW+f1_SE+f1_NW+f1_TE+f1_BW+f1_BE+f1_TW+f1_TN+f1_BS+f1_BN+f1_TS+
          f1_TNE+f1_TSW+f1_TSE+f1_TNW+f1_BNE+f1_BSW+f1_BSE+f1_BNW;
@@ -3343,33 +3342,33 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
 
       real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-      (D.f[REST])[kzero] =   c8o27* (drho-cu_sq);
-      (D.f[E   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-      (D.f[W   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-      (D.f[N   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-      (D.f[S   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-      (D.f[T   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-      (D.f[B   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-      (D.f[NE  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-      (D.f[SW  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-      (D.f[SE  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-      (D.f[NW  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-      (D.f[TE  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-      (D.f[BW  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-      (D.f[BE  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-      (D.f[TW  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-      (D.f[TN  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-      (D.f[BS  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-      (D.f[BN  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-      (D.f[TS  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-      (D.f[TNE ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-      (D.f[BSW ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-      (D.f[BNE ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-      (D.f[TSW ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-      (D.f[TSE ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-      (D.f[BNW ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-      (D.f[BSE ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-      (D.f[TNW ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
+      (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+      (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+      (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+      (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+      (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+      (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+      (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+      (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+      (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+      (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+      (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+      (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+      (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+      (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+      (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+      (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+      (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+      (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+      (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+      (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+      (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+      (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+      (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
    }
    __syncthreads();
 }          
@@ -3414,7 +3413,7 @@ extern "C" __global__ void LB_BC_Vel_West_27( int nx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevPlainBB27(
+__global__ void QVelDevPlainBB27(
    real* velocityX,
    real* velocityY,
    real* velocityZ,
@@ -3501,32 +3500,32 @@ extern "C" __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - change the pointer to write the results in the correct array
@@ -3536,32 +3535,32 @@ extern "C" __global__ void QVelDevPlainBB27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - rewrite distributions if there is a sub-grid distance (q) in same direction
       real q;
-      q = (subgridD.q[E  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[W  ])[kw  ]=f_E   + c4o9  * (-VeloX);
-      q = (subgridD.q[W  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[E  ])[ke  ]=f_W   + c4o9  * ( VeloX);
-      q = (subgridD.q[N  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[S  ])[ks  ]=f_N   + c4o9  * (-VeloY);
-      q = (subgridD.q[S  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[N  ])[kn  ]=f_S   + c4o9  * ( VeloY);
-      q = (subgridD.q[T  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[B  ])[kb  ]=f_T   + c4o9  * (-VeloZ);
-      q = (subgridD.q[B  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[T  ])[kt  ]=f_B   + c4o9  * ( VeloZ);
-      q = (subgridD.q[NE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[SW ])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
-      q = (subgridD.q[SW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[NE ])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
-      q = (subgridD.q[SE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[NW ])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
-      q = (subgridD.q[NW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[SE ])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
-      q = (subgridD.q[TE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BW ])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
-      q = (subgridD.q[BW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TE ])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
-      q = (subgridD.q[BE ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TW ])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
-      q = (subgridD.q[TW ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BE ])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
-      q = (subgridD.q[TN ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BS ])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
-      q = (subgridD.q[BS ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TN ])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
-      q = (subgridD.q[BN ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TS ])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
-      q = (subgridD.q[TS ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BN ])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
-      q = (subgridD.q[TNE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BSW])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
-      q = (subgridD.q[BSW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TNE])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
-      q = (subgridD.q[BNE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TSW])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
-      q = (subgridD.q[TSW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BNE])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
-      q = (subgridD.q[TSE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BNW])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
-      q = (subgridD.q[BNW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TSE])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
-      q = (subgridD.q[BSE])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[TNW])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
-      q = (subgridD.q[TNW])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[BSE])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_P00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M00  ])[kw  ]=f_E   + c4o9  * (-VeloX);
+      q = (subgridD.q[DIR_M00  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P00  ])[ke  ]=f_W   + c4o9  * ( VeloX);
+      q = (subgridD.q[DIR_0P0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0M0  ])[ks  ]=f_N   + c4o9  * (-VeloY);
+      q = (subgridD.q[DIR_0M0  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0P0  ])[kn  ]=f_S   + c4o9  * ( VeloY);
+      q = (subgridD.q[DIR_00P  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00M  ])[kb  ]=f_T   + c4o9  * (-VeloZ);
+      q = (subgridD.q[DIR_00M  ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_00P  ])[kt  ]=f_B   + c4o9  * ( VeloZ);
+      q = (subgridD.q[DIR_PP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MM0 ])[ksw ]=f_NE  + c1o9  * (-VeloX - VeloY);
+      q = (subgridD.q[DIR_MM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PP0 ])[kne ]=f_SW  + c1o9  * ( VeloX + VeloY);
+      q = (subgridD.q[DIR_PM0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MP0 ])[knw ]=f_SE  + c1o9  * (-VeloX + VeloY);
+      q = (subgridD.q[DIR_MP0 ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PM0 ])[kse ]=f_NW  + c1o9  * ( VeloX - VeloY);
+      q = (subgridD.q[DIR_P0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0M ])[kbw ]=f_TE  + c1o9  * (-VeloX - VeloZ);
+      q = (subgridD.q[DIR_M0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0P ])[kte ]=f_BW  + c1o9  * ( VeloX + VeloZ);
+      q = (subgridD.q[DIR_P0M ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_M0P ])[ktw ]=f_BE  + c1o9  * (-VeloX + VeloZ);
+      q = (subgridD.q[DIR_M0P ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_P0M ])[kbe ]=f_TW  + c1o9  * ( VeloX - VeloZ);
+      q = (subgridD.q[DIR_0PP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MM ])[kbs ]=f_TN  + c1o9  * (-VeloY - VeloZ);
+      q = (subgridD.q[DIR_0MM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PP ])[ktn ]=f_BS  + c1o9  * ( VeloY + VeloZ);
+      q = (subgridD.q[DIR_0PM ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0MP ])[kts ]=f_BN  + c1o9  * (-VeloY + VeloZ);
+      q = (subgridD.q[DIR_0MP ])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_0PM ])[kbn ]=f_TS  + c1o9  * ( VeloY - VeloZ);
+      q = (subgridD.q[DIR_PPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMM])[kbsw]=f_TNE + c1o36 * (-VeloX - VeloY - VeloZ);
+      q = (subgridD.q[DIR_MMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPP])[ktne]=f_BSW + c1o36 * ( VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_PPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MMP])[ktsw]=f_BNE + c1o36 * (-VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_MMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PPM])[kbne]=f_TSW + c1o36 * ( VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_PMP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPM])[kbnw]=f_TSE + c1o36 * (-VeloX + VeloY - VeloZ);
+      q = (subgridD.q[DIR_MPM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMP])[ktse]=f_BNW + c1o36 * ( VeloX - VeloY + VeloZ);
+      q = (subgridD.q[DIR_PMM])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_MPP])[ktnw]=f_BSE + c1o36 * (-VeloX + VeloY + VeloZ);
+      q = (subgridD.q[DIR_MPP])[k];   if (q>=c0o1 && q<=c1o1)    (dist.f[DIR_PMM])[kbse]=f_TNW + c1o36 * ( VeloX - VeloY - VeloZ);
    }
 }
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -3605,7 +3604,7 @@ extern "C" __global__ void QVelDevPlainBB27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevCouette27(real* vx,
+__global__ void QVelDevCouette27(real* vx,
 											real* vy,
 	 										real* vz,
 											real* DD,
@@ -3622,63 +3621,63 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -3703,32 +3702,32 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 			 *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			 *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			 *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -3762,94 +3761,94 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
       ////////////////////////////////////////////////////////////////////////////////
      
       ////////////////////////////////////////////////////////////////////////////////
-      real f_W    = (D.f[E   ])[ke   ];
-      real f_E    = (D.f[W   ])[kw   ];
-      real f_S    = (D.f[N   ])[kn   ];
-      real f_N    = (D.f[S   ])[ks   ];
-      real f_B    = (D.f[T   ])[kt   ];
-      real f_T    = (D.f[B   ])[kb   ];
-      real f_SW   = (D.f[NE  ])[kne  ];
-      real f_NE   = (D.f[SW  ])[ksw  ];
-      real f_NW   = (D.f[SE  ])[kse  ];
-      real f_SE   = (D.f[NW  ])[knw  ];
-      real f_BW   = (D.f[TE  ])[kte  ];
-      real f_TE   = (D.f[BW  ])[kbw  ];
-      real f_TW   = (D.f[BE  ])[kbe  ];
-      real f_BE   = (D.f[TW  ])[ktw  ];
-      real f_BS   = (D.f[TN  ])[ktn  ];
-      real f_TN   = (D.f[BS  ])[kbs  ];
-      real f_TS   = (D.f[BN  ])[kbn  ];
-      real f_BN   = (D.f[TS  ])[kts  ];
-      real f_BSW  = (D.f[TNE ])[ktne ];
-      real f_BNE  = (D.f[TSW ])[ktsw ];
-      real f_BNW  = (D.f[TSE ])[ktse ];
-      real f_BSE  = (D.f[TNW ])[ktnw ];
-      real f_TSW  = (D.f[BNE ])[kbne ];
-      real f_TNE  = (D.f[BSW ])[kbsw ];
-      real f_TNW  = (D.f[BSE ])[kbse ];
-      real f_TSE  = (D.f[BNW ])[kbnw ];
+      real f_W    = (D.f[DIR_P00   ])[ke   ];
+      real f_E    = (D.f[DIR_M00   ])[kw   ];
+      real f_S    = (D.f[DIR_0P0   ])[kn   ];
+      real f_N    = (D.f[DIR_0M0   ])[ks   ];
+      real f_B    = (D.f[DIR_00P   ])[kt   ];
+      real f_T    = (D.f[DIR_00M   ])[kb   ];
+      real f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 	  ////////////////////////////////////////////////////////////////////////////////
 
 	  ////////////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  ///////               FlowDirection Y !!!!!!!!!!                                                           ///////////////////////////////////
@@ -3864,63 +3863,63 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 	  real ms=-c6o1;
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //2nd order moment
-	  real kxxMyyFromfcNEQ = c0o1;//-c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all E+W minus all N+S (no combinations of xy left)
+	  real kxxMyyFromfcNEQ = c0o1;//-c3o2 * (f_BW+f_W+f_TW-f_BS-f_S-f_TS-f_BN-f_N-f_TN+f_BE+f_E+f_TE-(vx1*vx1-vx2*vx2));		//all DIR_P00+DIR_M00 minus all DIR_0P0+DIR_0M0 (no combinations of xy left)
 
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 	  //set distributions
       real q;
-      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[W  ])[kw  ]=f_E   + ms*c2o27  * VeloX;	
-      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[E  ])[ke  ]=f_W   - ms*c2o27  * VeloX;	
-      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[S  ])[ks  ]=f_N   + ms*c2o27  * VeloY;	
-      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[N  ])[kn  ]=f_S   - ms*c2o27  * VeloY;	
-	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[B  ])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[T  ])[kt  ]=f_B   - ms*c2o27  * VeloZ;
-      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[SW ])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[NE ])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[NW ])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
-	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[SE ])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
-	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BW ])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TE ])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TW ])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
-	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BE ])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BS ])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
-	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TN ])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TS ])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
-	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BN ])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
-      q = q_dirTNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BSW])[kbsw]=f_TNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirBSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TNE])[ktne]=f_BSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
-      q = q_dirBNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TSW])[ktsw]=f_BNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
-      q = q_dirTSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BNE])[kbne]=f_TSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirTSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BNW])[kbnw]=f_TSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      q = q_dirBNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TSE])[ktse]=f_BNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
-      q = q_dirBSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[TNW])[ktnw]=f_BSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
-      q = q_dirTNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[BSE])[kbse]=f_TNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
-      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[W  ])[kw  ]=f_E   + ms*c2over27  * VeloX;	
-   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[E  ])[ke  ]=f_W   - ms*c2over27  * VeloX;	
-   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[S  ])[ks  ]=f_N   + ms*c2over27  * VeloY;	
-   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[N  ])[kn  ]=f_S   - ms*c2over27  * VeloY;	
-	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[B  ])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[T  ])[kt  ]=f_B   - ms*c2over27  * VeloZ;
-   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[SW ])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[NE ])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[NW ])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
-	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[SE ])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
-	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[BW ])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[TE ])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[TW ])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
-	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[BE ])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[BS ])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[TN ])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[TS ])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
-	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[BN ])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirTNE[k];	if (q>=zero && q<=one)	(D.f[BSW])[kbsw]=f_TNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirBSW[k];	if (q>=zero && q<=one)	(D.f[TNE])[ktne]=f_BSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
-   //   q = q_dirBNE[k];	if (q>=zero && q<=one)	(D.f[TSW])[ktsw]=f_BNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
-   //   q = q_dirTSW[k];	if (q>=zero && q<=one)	(D.f[BNE])[kbne]=f_TSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirTSE[k];	if (q>=zero && q<=one)	(D.f[BNW])[kbnw]=f_TSE + ms*c1over216 * VeloX - ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
-   //   q = q_dirBNW[k];	if (q>=zero && q<=one)	(D.f[TSE])[ktse]=f_BNW - ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
-   //   q = q_dirBSE[k];	if (q>=zero && q<=one)	(D.f[TNW])[ktnw]=f_BSE + ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
-   //   q = q_dirTNW[k];	if (q>=zero && q<=one)	(D.f[BSE])[kbse]=f_TNW - ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2o27  * VeloX;	
+      q = q_dirW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2o27  * VeloX;	
+      q = q_dirN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2o27  * VeloY;	
+      q = q_dirS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2o27  * VeloY;	
+	  q = q_dirT[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2o27  * VeloZ - c3o2*c2o27*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirB[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2o27  * VeloZ;
+      q = q_dirNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1o54  * VeloX - ms*c1o54  * VeloY;
+	  q = q_dirNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1o54  * VeloX + ms*c1o54  * VeloY;
+	  q = q_dirTE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirBE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1o54  * VeloX - ms*c1o54  * VeloZ;
+	  q = q_dirTW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1o54  * VeloX + ms*c1o54  * VeloZ - c3o2*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on-c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirTN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+	  q = q_dirBS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirBN[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1o54  * VeloY - ms*c1o54  * VeloZ;
+	  q = q_dirTS[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1o54  * VeloY + ms*c1o54  * VeloZ + c3o1*c1o54*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on+c1o12*kxxMyyFromfcNEQ;
+      q = q_dirTNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirBSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
+      q = q_dirBNE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
+      q = q_dirTSW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PPM])[kbne]=f_TSW - ms*c1o216 * VeloX - ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirTSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MPM])[kbnw]=f_TSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      q = q_dirBNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMP])[ktse]=f_BNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
+      q = q_dirBSE[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_MPP])[ktnw]=f_BSE + ms*c1o216 * VeloX - ms*c1o216 * VeloY - ms*c1o216 * VeloZ;
+      q = q_dirTNW[k];	if (q>=c0o1 && q<=c1o1)	(D.f[DIR_PMM])[kbse]=f_TNW - ms*c1o216 * VeloX + ms*c1o216 * VeloY + ms*c1o216 * VeloZ + c3o1*c1o216*((c2o1*VeloY-vx2)*(c2o1*VeloY-vx2)-vx2*vx2)*on;
+      //q = q_dirE[k];	if (q>=zero && q<=one)	(D.f[DIR_M00  ])[kw  ]=f_E   + ms*c2over27  * VeloX;	
+   //   q = q_dirW[k];	if (q>=zero && q<=one)	(D.f[DIR_P00  ])[ke  ]=f_W   - ms*c2over27  * VeloX;	
+   //   q = q_dirN[k];	if (q>=zero && q<=one)	(D.f[DIR_0M0  ])[ks  ]=f_N   + ms*c2over27  * VeloY;	
+   //   q = q_dirS[k];	if (q>=zero && q<=one)	(D.f[DIR_0P0  ])[kn  ]=f_S   - ms*c2over27  * VeloY;	
+	  //q = q_dirT[k];	if (q>=zero && q<=one)	(D.f[DIR_00M  ])[kb  ]=f_T   + ms*c2over27  * VeloZ - c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirB[k];	if (q>=zero && q<=one)	(D.f[DIR_00P  ])[kt  ]=f_B   - ms*c2over27  * VeloZ;
+   //   q = q_dirNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MM0 ])[ksw ]=f_NE  + ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PP0 ])[kne ]=f_SW  - ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MP0 ])[knw ]=f_SE  + ms*c1over54  * VeloX - ms*c1over54  * VeloY;
+	  //q = q_dirNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PM0 ])[kse ]=f_NW  - ms*c1over54  * VeloX + ms*c1over54  * VeloY;
+	  //q = q_dirTE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0M ])[kbw ]=f_TE  + ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0P ])[kte ]=f_BW  - ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirBE[k];	if (q>=zero && q<=one)	(D.f[DIR_M0P ])[ktw ]=f_BE  + ms*c1over54  * VeloX - ms*c1over54  * VeloZ;
+	  //q = q_dirTW[k];	if (q>=zero && q<=one)	(D.f[DIR_P0M ])[kbe ]=f_TW  - ms*c1over54  * VeloX + ms*c1over54  * VeloZ - c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirTN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MM ])[kbs ]=f_TN  + ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+	  //q = q_dirBS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PP ])[ktn ]=f_BS  - ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirBN[k];	if (q>=zero && q<=one)	(D.f[DIR_0MP ])[kts ]=f_BN  + ms*c1over54  * VeloY - ms*c1over54  * VeloZ;
+	  //q = q_dirTS[k];	if (q>=zero && q<=one)	(D.f[DIR_0PM ])[kbn ]=f_TS  - ms*c1over54  * VeloY + ms*c1over54  * VeloZ + c1o2*c1o9*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirTNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMM])[kbsw]=f_TNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirBSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PPP])[ktne]=f_BSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
+   //   q = q_dirBNE[k];	if (q>=zero && q<=one)	(D.f[DIR_MMP])[ktsw]=f_BNE + ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
+   //   q = q_dirTSW[k];	if (q>=zero && q<=one)	(D.f[DIR_PPM])[kbne]=f_TSW - ms*c1over216 * VeloX - ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirTSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MPM])[kbnw]=f_TSE + ms*c1over216 * VeloX - ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
+   //   q = q_dirBNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PMP])[ktse]=f_BNW - ms*c1over216 * VeloX + ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
+   //   q = q_dirBSE[k];	if (q>=zero && q<=one)	(D.f[DIR_MPP])[ktnw]=f_BSE + ms*c1over216 * VeloX - ms*c1over216 * VeloY - ms*c1over216 * VeloZ;
+   //   q = q_dirTNW[k];	if (q>=zero && q<=one)	(D.f[DIR_PMM])[kbse]=f_TNW - ms*c1over216 * VeloX + ms*c1over216 * VeloY + ms*c1over216 * VeloZ + c1o2*c1o36*((two*VeloY-vx2)*(two*VeloY-vx2)-vx2*vx2)*on;
 	  //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    }
 }
@@ -3965,7 +3964,7 @@ extern "C" __global__ void QVelDevCouette27(real* vx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDev1h27( int inx,
+__global__ void QVelDev1h27( int inx,
 										int iny,
 										real* vx,
 										real* vy,
@@ -3989,63 +3988,63 @@ extern "C" __global__ void QVelDev1h27( int inx,
 	Distributions27 D;
 	if (isEvenTimestep==true)
 	{
-		D.f[E   ] = &DD[E   *size_Mat];
-		D.f[W   ] = &DD[W   *size_Mat];
-		D.f[N   ] = &DD[N   *size_Mat];
-		D.f[S   ] = &DD[S   *size_Mat];
-		D.f[T   ] = &DD[T   *size_Mat];
-		D.f[B   ] = &DD[B   *size_Mat];
-		D.f[NE  ] = &DD[NE  *size_Mat];
-		D.f[SW  ] = &DD[SW  *size_Mat];
-		D.f[SE  ] = &DD[SE  *size_Mat];
-		D.f[NW  ] = &DD[NW  *size_Mat];
-		D.f[TE  ] = &DD[TE  *size_Mat];
-		D.f[BW  ] = &DD[BW  *size_Mat];
-		D.f[BE  ] = &DD[BE  *size_Mat];
-		D.f[TW  ] = &DD[TW  *size_Mat];
-		D.f[TN  ] = &DD[TN  *size_Mat];
-		D.f[BS  ] = &DD[BS  *size_Mat];
-		D.f[BN  ] = &DD[BN  *size_Mat];
-		D.f[TS  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[TNE *size_Mat];
-		D.f[TSW ] = &DD[TSW *size_Mat];
-		D.f[TSE ] = &DD[TSE *size_Mat];
-		D.f[TNW ] = &DD[TNW *size_Mat];
-		D.f[BNE ] = &DD[BNE *size_Mat];
-		D.f[BSW ] = &DD[BSW *size_Mat];
-		D.f[BSE ] = &DD[BSE *size_Mat];
-		D.f[BNW ] = &DD[BNW *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 	} 
 	else
 	{
-		D.f[W   ] = &DD[E   *size_Mat];
-		D.f[E   ] = &DD[W   *size_Mat];
-		D.f[S   ] = &DD[N   *size_Mat];
-		D.f[N   ] = &DD[S   *size_Mat];
-		D.f[B   ] = &DD[T   *size_Mat];
-		D.f[T   ] = &DD[B   *size_Mat];
-		D.f[SW  ] = &DD[NE  *size_Mat];
-		D.f[NE  ] = &DD[SW  *size_Mat];
-		D.f[NW  ] = &DD[SE  *size_Mat];
-		D.f[SE  ] = &DD[NW  *size_Mat];
-		D.f[BW  ] = &DD[TE  *size_Mat];
-		D.f[TE  ] = &DD[BW  *size_Mat];
-		D.f[TW  ] = &DD[BE  *size_Mat];
-		D.f[BE  ] = &DD[TW  *size_Mat];
-		D.f[BS  ] = &DD[TN  *size_Mat];
-		D.f[TN  ] = &DD[BS  *size_Mat];
-		D.f[TS  ] = &DD[BN  *size_Mat];
-		D.f[BN  ] = &DD[TS  *size_Mat];
-		D.f[REST] = &DD[REST*size_Mat];
-		D.f[TNE ] = &DD[BSW *size_Mat];
-		D.f[TSW ] = &DD[BNE *size_Mat];
-		D.f[TSE ] = &DD[BNW *size_Mat];
-		D.f[TNW ] = &DD[BSE *size_Mat];
-		D.f[BNE ] = &DD[TSW *size_Mat];
-		D.f[BSW ] = &DD[TNE *size_Mat];
-		D.f[BSE ] = &DD[TNW *size_Mat];
-		D.f[BNW ] = &DD[TSE *size_Mat];
+		D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+		D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+		D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+		D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+		D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+		D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DD[DIR_000*size_Mat];
+		D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+		D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+		D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+		D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+		D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+		D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+		D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+		D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 	}
 	////////////////////////////////////////////////////////////////////////////////
 	const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -4080,32 +4079,32 @@ extern "C" __global__ void QVelDev1h27( int inx,
 			*q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
 			*q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
 			*q_dirBSE, *q_dirBNW; 
-		q_dirE   = &QQ[E   * numberOfBCnodes];
-		q_dirW   = &QQ[W   * numberOfBCnodes];
-		q_dirN   = &QQ[N   * numberOfBCnodes];
-		q_dirS   = &QQ[S   * numberOfBCnodes];
-		q_dirT   = &QQ[T   * numberOfBCnodes];
-		q_dirB   = &QQ[B   * numberOfBCnodes];
-		q_dirNE  = &QQ[NE  * numberOfBCnodes];
-		q_dirSW  = &QQ[SW  * numberOfBCnodes];
-		q_dirSE  = &QQ[SE  * numberOfBCnodes];
-		q_dirNW  = &QQ[NW  * numberOfBCnodes];
-		q_dirTE  = &QQ[TE  * numberOfBCnodes];
-		q_dirBW  = &QQ[BW  * numberOfBCnodes];
-		q_dirBE  = &QQ[BE  * numberOfBCnodes];
-		q_dirTW  = &QQ[TW  * numberOfBCnodes];
-		q_dirTN  = &QQ[TN  * numberOfBCnodes];
-		q_dirBS  = &QQ[BS  * numberOfBCnodes];
-		q_dirBN  = &QQ[BN  * numberOfBCnodes];
-		q_dirTS  = &QQ[TS  * numberOfBCnodes];
-		q_dirTNE = &QQ[TNE * numberOfBCnodes];
-		q_dirTSW = &QQ[TSW * numberOfBCnodes];
-		q_dirTSE = &QQ[TSE * numberOfBCnodes];
-		q_dirTNW = &QQ[TNW * numberOfBCnodes];
-		q_dirBNE = &QQ[BNE * numberOfBCnodes];
-		q_dirBSW = &QQ[BSW * numberOfBCnodes];
-		q_dirBSE = &QQ[BSE * numberOfBCnodes];
-		q_dirBNW = &QQ[BNW * numberOfBCnodes];
+		q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+		q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+		q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+		q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+		q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+		q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+		q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+		q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+		q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+		q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+		q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+		q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+		q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+		q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+		q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+		q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+		q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+		q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+		q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+		q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+		q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+		q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+		q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+		q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+		q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+		q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
 		////////////////////////////////////////////////////////////////////////////////
 		//index
 		unsigned int KQK  = k_Q[k];
@@ -4168,37 +4167,37 @@ extern "C" __global__ void QVelDev1h27( int inx,
 		//real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
 		//	f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-		//f_W    = (D.f[E   ])[ke   ];
-		//f_E    = (D.f[W   ])[kw   ];
-		//f_S    = (D.f[N   ])[kn   ];
-		//f_N    = (D.f[S   ])[ks   ];
-		//f_B    = (D.f[T   ])[kt   ];
-		//f_T    = (D.f[B   ])[kb   ];
-		//f_SW   = (D.f[NE  ])[kne  ];
-		//f_NE   = (D.f[SW  ])[ksw  ];
-		//f_NW   = (D.f[SE  ])[kse  ];
-		//f_SE   = (D.f[NW  ])[knw  ];
-		//f_BW   = (D.f[TE  ])[kte  ];
-		//f_TE   = (D.f[BW  ])[kbw  ];
-		//f_TW   = (D.f[BE  ])[kbe  ];
-		//f_BE   = (D.f[TW  ])[ktw  ];
-		//f_BS   = (D.f[TN  ])[ktn  ];
-		//f_TN   = (D.f[BS  ])[kbs  ];
-		//f_TS   = (D.f[BN  ])[kbn  ];
-		//f_BN   = (D.f[TS  ])[kts  ];
-		//f_BSW  = (D.f[TNE ])[ktne ];
-		//f_BNE  = (D.f[TSW ])[ktsw ];
-		//f_BNW  = (D.f[TSE ])[ktse ];
-		//f_BSE  = (D.f[TNW ])[ktnw ];
-		//f_TSW  = (D.f[BNE ])[kbne ];
-		//f_TNE  = (D.f[BSW ])[kbsw ];
-		//f_TNW  = (D.f[BSE ])[kbse ];
-		//f_TSE  = (D.f[BNW ])[kbnw ];
+		//f_W    = (D.f[DIR_P00   ])[ke   ];
+		//f_E    = (D.f[DIR_M00   ])[kw   ];
+		//f_S    = (D.f[DIR_0P0   ])[kn   ];
+		//f_N    = (D.f[DIR_0M0   ])[ks   ];
+		//f_B    = (D.f[DIR_00P   ])[kt   ];
+		//f_T    = (D.f[DIR_00M   ])[kb   ];
+		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
+		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
+		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
+		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
+		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
+		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
+		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
+		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
+		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
 		////////////////////////////////////////////////////////////////////////////////
 		real /*vx1, vx2,*/ vx3, drho, feq, q, cu_sq;
 		//drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
 		//	f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-		//	f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+		//	f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
 		//vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
 		//	((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -4218,67 +4217,67 @@ extern "C" __global__ void QVelDev1h27( int inx,
 		//////////////////////////////////////////////////////////////////////////
 		if (isEvenTimestep==false)
 		{
-			D.f[E   ] = &DD[E   *size_Mat];
-			D.f[W   ] = &DD[W   *size_Mat];
-			D.f[N   ] = &DD[N   *size_Mat];
-			D.f[S   ] = &DD[S   *size_Mat];
-			D.f[T   ] = &DD[T   *size_Mat];
-			D.f[B   ] = &DD[B   *size_Mat];
-			D.f[NE  ] = &DD[NE  *size_Mat];
-			D.f[SW  ] = &DD[SW  *size_Mat];
-			D.f[SE  ] = &DD[SE  *size_Mat];
-			D.f[NW  ] = &DD[NW  *size_Mat];
-			D.f[TE  ] = &DD[TE  *size_Mat];
-			D.f[BW  ] = &DD[BW  *size_Mat];
-			D.f[BE  ] = &DD[BE  *size_Mat];
-			D.f[TW  ] = &DD[TW  *size_Mat];
-			D.f[TN  ] = &DD[TN  *size_Mat];
-			D.f[BS  ] = &DD[BS  *size_Mat];
-			D.f[BN  ] = &DD[BN  *size_Mat];
-			D.f[TS  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[TNE *size_Mat];
-			D.f[TSW ] = &DD[TSW *size_Mat];
-			D.f[TSE ] = &DD[TSE *size_Mat];
-			D.f[TNW ] = &DD[TNW *size_Mat];
-			D.f[BNE ] = &DD[BNE *size_Mat];
-			D.f[BSW ] = &DD[BSW *size_Mat];
-			D.f[BSE ] = &DD[BSE *size_Mat];
-			D.f[BNW ] = &DD[BNW *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
 		} 
 		else
 		{
-			D.f[W   ] = &DD[E   *size_Mat];
-			D.f[E   ] = &DD[W   *size_Mat];
-			D.f[S   ] = &DD[N   *size_Mat];
-			D.f[N   ] = &DD[S   *size_Mat];
-			D.f[B   ] = &DD[T   *size_Mat];
-			D.f[T   ] = &DD[B   *size_Mat];
-			D.f[SW  ] = &DD[NE  *size_Mat];
-			D.f[NE  ] = &DD[SW  *size_Mat];
-			D.f[NW  ] = &DD[SE  *size_Mat];
-			D.f[SE  ] = &DD[NW  *size_Mat];
-			D.f[BW  ] = &DD[TE  *size_Mat];
-			D.f[TE  ] = &DD[BW  *size_Mat];
-			D.f[TW  ] = &DD[BE  *size_Mat];
-			D.f[BE  ] = &DD[TW  *size_Mat];
-			D.f[BS  ] = &DD[TN  *size_Mat];
-			D.f[TN  ] = &DD[BS  *size_Mat];
-			D.f[TS  ] = &DD[BN  *size_Mat];
-			D.f[BN  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[BSW *size_Mat];
-			D.f[TSW ] = &DD[BNE *size_Mat];
-			D.f[TSE ] = &DD[BNW *size_Mat];
-			D.f[TNW ] = &DD[BSE *size_Mat];
-			D.f[BNE ] = &DD[TSW *size_Mat];
-			D.f[BSW ] = &DD[TNE *size_Mat];
-			D.f[BSE ] = &DD[TNW *size_Mat];
-			D.f[BNW ] = &DD[TSE *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
 		}
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		//Test
-		//(D.f[REST])[k]=c1o10;
+		//(D.f[DIR_000])[k]=c1o10;
 		////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 		//ToDo anders Klammern
@@ -4287,208 +4286,208 @@ extern "C" __global__ void QVelDev1h27( int inx,
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*( vx1        )*/+c9over2*( vx1        )*( vx1        )-cu_sq); 
-		//	(D.f[W])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);
-		//	//(D.f[W])[kw]=zero;
+		//	(D.f[DIR_M00])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);
+		//	//(D.f[DIR_M00])[kw]=zero;
 		//}
 
 		//q = q_dirW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*(-vx1        )*/+c9over2*(-vx1        )*(-vx1        )-cu_sq); 
-		//	(D.f[E])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);
-		//	//(D.f[E])[ke]=zero;
+		//	(D.f[DIR_P00])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);
+		//	//(D.f[DIR_P00])[ke]=zero;
 		//}
 
 		//q = q_dirN[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*(    vx2     )*/+c9over2*(     vx2    )*(     vx2    )-cu_sq); 
-		//	(D.f[S])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);
-		//	//(D.f[S])[ks]=zero;
+		//	(D.f[DIR_0M0])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);
+		//	//(D.f[DIR_0M0])[ks]=zero;
 		//}
 
 		//q = q_dirS[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*(   -vx2     )*/+c9over2*(    -vx2    )*(    -vx2    )-cu_sq); 
-		//	(D.f[N])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);
-		//	//(D.f[N])[kn]=zero;
+		//	(D.f[DIR_0P0])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);
+		//	//(D.f[DIR_0P0])[kn]=zero;
 		//}
 
 		//q = q_dirT[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*(         vx3)*/+c9over2*(         vx3)*(         vx3)-cu_sq); 
-		//	(D.f[B])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);
-		//	//(D.f[B])[kb]=one;
+		//	(D.f[DIR_00M])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);
+		//	//(D.f[DIR_00M])[kb]=one;
 		//}
 
 		//q = q_dirB[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c2over27* (drho/*+three*(        -vx3)*/+c9over2*(        -vx3)*(        -vx3)-cu_sq); 
-		//	(D.f[T])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);
-		//	//(D.f[T])[kt]=zero;
+		//	(D.f[DIR_00P])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);
+		//	//(D.f[DIR_00P])[kt]=zero;
 		//}
 
 		//q = q_dirNE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*( vx1+vx2    )*/+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-		//	(D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);
-		//	//(D.f[SW])[ksw]=zero;
+		//	(D.f[DIR_MM0])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);
+		//	//(D.f[DIR_MM0])[ksw]=zero;
 		//}
 
 		//q = q_dirSW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(-vx1-vx2    )*/+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-		//	(D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);
-		//	//(D.f[NE])[kne]=zero;
+		//	(D.f[DIR_PP0])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);
+		//	//(D.f[DIR_PP0])[kne]=zero;
 		//}
 
 		//q = q_dirSE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*( vx1-vx2    )*/+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-		//	(D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);
-		//	//(D.f[NW])[knw]=zero;
+		//	(D.f[DIR_MP0])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);
+		//	//(D.f[DIR_MP0])[knw]=zero;
 		//}
 
 		//q = q_dirNW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(-vx1+vx2    )*/+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-		//	(D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);
-		//	//(D.f[SE])[kse]=zero;
+		//	(D.f[DIR_PM0])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);
+		//	//(D.f[DIR_PM0])[kse]=zero;
 		//}
 
 		//q = q_dirTE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*( vx1    +vx3)*/+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-		//	(D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);
-		//	//(D.f[BW])[kbw]=zero;
+		//	(D.f[DIR_M0M])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);
+		//	//(D.f[DIR_M0M])[kbw]=zero;
 		//}
 
 		//q = q_dirBW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(-vx1    -vx3)*/+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-		//	(D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);
-		//	//(D.f[TE])[kte]=zero;
+		//	(D.f[DIR_P0P])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);
+		//	//(D.f[DIR_P0P])[kte]=zero;
 		//}
 
 		//q = q_dirBE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*( vx1    -vx3)*/+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-		//	(D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);
-		//	//(D.f[TW])[ktw]=zero;
+		//	(D.f[DIR_M0P])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);
+		//	//(D.f[DIR_M0P])[ktw]=zero;
 		//}
 
 		//q = q_dirTW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(-vx1    +vx3)*/+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-		//	(D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);
-		//	//(D.f[BE])[kbe]=zero;
+		//	(D.f[DIR_P0M])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);
+		//	//(D.f[DIR_P0M])[kbe]=zero;
 		//}
 
 		//q = q_dirTN[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(     vx2+vx3)*/+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-		//	(D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);
-		//	//(D.f[BS])[kbs]=zero;
+		//	(D.f[DIR_0MM])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_0MM])[kbs]=zero;
 		//}
 
 		//q = q_dirBS[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(    -vx2-vx3)*/+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-		//	(D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);
-		//	//(D.f[TN])[ktn]=zero;
+		//	(D.f[DIR_0PP])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_0PP])[ktn]=zero;
 		//}
 
 		//q = q_dirBN[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(     vx2-vx3)*/+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-		//	(D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);
-		//	//(D.f[TS])[kts]=zero;
+		//	(D.f[DIR_0MP])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_0MP])[kts]=zero;
 		//}
 
 		//q = q_dirTS[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over54* (drho/*+three*(    -vx2+vx3)*/+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-		//	(D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);
-		//	//(D.f[BN])[kbn]=zero;
+		//	(D.f[DIR_0PM])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_0PM])[kbn]=zero;
 		//}
 
 		//q = q_dirTNE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*( vx1+vx2+vx3)*/+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-		//	(D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);
-		//	//(D.f[BSW])[kbsw]=zero;
+		//	(D.f[DIR_MMM])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_MMM])[kbsw]=zero;
 		//}
 
 		//q = q_dirBSW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*(-vx1-vx2-vx3)*/+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-		//	(D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);
-		//	//(D.f[TNE])[ktne]=zero;
+		//	(D.f[DIR_PPP])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_PPP])[ktne]=zero;
 		//}
 
 		//q = q_dirBNE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*( vx1+vx2-vx3)*/+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-		//	(D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);
-		//	//(D.f[TSW])[ktsw]=zero;
+		//	(D.f[DIR_MMP])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_MMP])[ktsw]=zero;
 		//}
 
 		//q = q_dirTSW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*(-vx1-vx2+vx3)*/+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-		//	(D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);
-		//	//(D.f[BNE])[kbne]=zero;
+		//	(D.f[DIR_PPM])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_PPM])[kbne]=zero;
 		//}
 
 		//q = q_dirTSE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*( vx1-vx2+vx3)*/+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-		//	(D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);
-		//	//(D.f[BNW])[kbnw]=zero;
+		//	(D.f[DIR_MPM])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_MPM])[kbnw]=zero;
 		//}
 
 		//q = q_dirBNW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*(-vx1+vx2-vx3)*/+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-		//	(D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);
-		//	//(D.f[TSE])[ktse]=zero;
+		//	(D.f[DIR_PMP])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_PMP])[ktse]=zero;
 		//}
 
 		//q = q_dirBSE[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*( vx1-vx2-vx3)*/+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-		//	(D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);
-		//	//(D.f[TNW])[ktnw]=zero;
+		//	(D.f[DIR_MPP])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);
+		//	//(D.f[DIR_MPP])[ktnw]=zero;
 		//}
 
 		//q = q_dirTNW[k];
 		//if (q>=zero && q<=one)
 		//{
 		//	feq=c1over216*(drho/*+three*(-vx1+vx2+vx3)*/+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-		//	(D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);
-		//	//(D.f[BSE])[kbse]=zero;
+		//	(D.f[DIR_PMM])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);
+		//	//(D.f[DIR_PMM])[kbse]=zero;
 		//}
 
 		///////// equilibrium BC
@@ -4501,208 +4500,208 @@ extern "C" __global__ void QVelDev1h27( int inx,
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*( VeloXpur        )+c9o2*( VeloX        )*( VeloX        )-cu_sq); 
-			(D.f[W])[kw]=feq;
-			//(D.f[W])[kw]=zero;
+			(D.f[DIR_M00])[kw]=feq;
+			//(D.f[DIR_M00])[kw]=zero;
 		}
 
 		q = q_dirW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*(-VeloXpur        )+c9o2*(-VeloX        )*(-VeloX        )-cu_sq); 
-			(D.f[E])[ke]=feq;
-			//(D.f[E])[ke]=zero;
+			(D.f[DIR_P00])[ke]=feq;
+			//(D.f[DIR_P00])[ke]=zero;
 		}
 
 		q = q_dirN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*(    VeloYpur     )+c9o2*(     VeloY    )*(     VeloY    )-cu_sq); 
-			(D.f[S])[ks]=feq;
-			//(D.f[S])[ks]=zero;
+			(D.f[DIR_0M0])[ks]=feq;
+			//(D.f[DIR_0M0])[ks]=zero;
 		}
 
 		q = q_dirS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*(   -VeloYpur     )+c9o2*(    -VeloY    )*(    -VeloY    )-cu_sq); 
-			(D.f[N])[kn]=feq;
-			//(D.f[N])[kn]=zero;
+			(D.f[DIR_0P0])[kn]=feq;
+			//(D.f[DIR_0P0])[kn]=zero;
 		}
 
 		q = q_dirT[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq); 
-			(D.f[B])[kb]=feq;
-			//(D.f[B])[kb]=one;
+			(D.f[DIR_00M])[kb]=feq;
+			//(D.f[DIR_00M])[kb]=one;
 		}
 
 		q = q_dirB[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-			(D.f[T])[kt]=feq;
-			//(D.f[T])[kt]=zero;
+			(D.f[DIR_00P])[kt]=feq;
+			//(D.f[DIR_00P])[kt]=zero;
 		}
 
 		q = q_dirNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*( VeloXpur+VeloYpur    )+c9o2*( VeloX+VeloY    )*( VeloX+VeloY    )-cu_sq); 
-			(D.f[SW])[ksw]=feq;
-			//(D.f[SW])[ksw]=zero;
+			(D.f[DIR_MM0])[ksw]=feq;
+			//(D.f[DIR_MM0])[ksw]=zero;
 		}
 
 		q = q_dirSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(-VeloXpur-VeloYpur    )+c9o2*(-VeloX-VeloY    )*(-VeloX-VeloY    )-cu_sq); 
-			(D.f[NE])[kne]=feq;
-			//(D.f[NE])[kne]=zero;
+			(D.f[DIR_PP0])[kne]=feq;
+			//(D.f[DIR_PP0])[kne]=zero;
 		}
 
 		q = q_dirSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*( VeloXpur-VeloYpur    )+c9o2*( VeloX-VeloY    )*( VeloX-VeloY    )-cu_sq); 
-			(D.f[NW])[knw]=feq;
-			//(D.f[NW])[knw]=zero;
+			(D.f[DIR_MP0])[knw]=feq;
+			//(D.f[DIR_MP0])[knw]=zero;
 		}
 
 		q = q_dirNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(-VeloXpur+VeloYpur    )+c9o2*(-VeloX+VeloY    )*(-VeloX+VeloY    )-cu_sq); 
-			(D.f[SE])[kse]=feq;
-			//(D.f[SE])[kse]=zero;
+			(D.f[DIR_PM0])[kse]=feq;
+			//(D.f[DIR_PM0])[kse]=zero;
 		}
 
 		q = q_dirTE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*( VeloXpur    +vx3)+c9o2*( VeloX    +vx3)*( VeloX    +vx3)-cu_sq); 
-			(D.f[BW])[kbw]=feq;
-			//(D.f[BW])[kbw]=zero;
+			(D.f[DIR_M0M])[kbw]=feq;
+			//(D.f[DIR_M0M])[kbw]=zero;
 		}
 
 		q = q_dirBW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(-VeloXpur    -vx3)+c9o2*(-VeloX    -vx3)*(-VeloX    -vx3)-cu_sq); 
-			(D.f[TE])[kte]=feq;
-			//(D.f[TE])[kte]=zero;
+			(D.f[DIR_P0P])[kte]=feq;
+			//(D.f[DIR_P0P])[kte]=zero;
 		}
 
 		q = q_dirBE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*( VeloXpur    -vx3)+c9o2*( VeloX    -vx3)*( VeloX    -vx3)-cu_sq); 
-			(D.f[TW])[ktw]=feq;
-			//(D.f[TW])[ktw]=zero;
+			(D.f[DIR_M0P])[ktw]=feq;
+			//(D.f[DIR_M0P])[ktw]=zero;
 		}
 
 		q = q_dirTW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(-VeloXpur    +vx3)+c9o2*(-VeloX    +vx3)*(-VeloX    +vx3)-cu_sq); 
-			(D.f[BE])[kbe]=feq;
-			//(D.f[BE])[kbe]=zero;
+			(D.f[DIR_P0M])[kbe]=feq;
+			//(D.f[DIR_P0M])[kbe]=zero;
 		}
 
 		q = q_dirTN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(     VeloYpur+vx3)+c9o2*(     VeloY+vx3)*(     VeloY+vx3)-cu_sq); 
-			(D.f[BS])[kbs]=feq;
-			//(D.f[BS])[kbs]=zero;
+			(D.f[DIR_0MM])[kbs]=feq;
+			//(D.f[DIR_0MM])[kbs]=zero;
 		}
 
 		q = q_dirBS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(    -VeloYpur-vx3)+c9o2*(    -VeloY-vx3)*(    -VeloY-vx3)-cu_sq); 
-			(D.f[TN])[ktn]=feq;
-			//(D.f[TN])[ktn]=zero;
+			(D.f[DIR_0PP])[ktn]=feq;
+			//(D.f[DIR_0PP])[ktn]=zero;
 		}
 
 		q = q_dirBN[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(     VeloYpur-vx3)+c9o2*(     VeloY-vx3)*(     VeloY-vx3)-cu_sq); 
-			(D.f[TS])[kts]=feq;
-			//(D.f[TS])[kts]=zero;
+			(D.f[DIR_0MP])[kts]=feq;
+			//(D.f[DIR_0MP])[kts]=zero;
 		}
 
 		q = q_dirTS[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o54* (drho+c3o1*(    -VeloYpur+vx3)+c9o2*(    -VeloY+vx3)*(    -VeloY+vx3)-cu_sq); 
-			(D.f[BN])[kbn]=feq;
-			//(D.f[BN])[kbn]=zero;
+			(D.f[DIR_0PM])[kbn]=feq;
+			//(D.f[DIR_0PM])[kbn]=zero;
 		}
 
 		q = q_dirTNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*( VeloXpur+VeloYpur+vx3)+c9o2*( VeloX+VeloY+vx3)*( VeloX+VeloY+vx3)-cu_sq); 
-			(D.f[BSW])[kbsw]=feq;
-			//(D.f[BSW])[kbsw]=zero;
+			(D.f[DIR_MMM])[kbsw]=feq;
+			//(D.f[DIR_MMM])[kbsw]=zero;
 		}
 
 		q = q_dirBSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*(-VeloXpur-VeloYpur-vx3)+c9o2*(-VeloX-VeloY-vx3)*(-VeloX-VeloY-vx3)-cu_sq); 
-			(D.f[TNE])[ktne]=feq;
-			//(D.f[TNE])[ktne]=zero;
+			(D.f[DIR_PPP])[ktne]=feq;
+			//(D.f[DIR_PPP])[ktne]=zero;
 		}
 
 		q = q_dirBNE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*( VeloXpur+VeloYpur-vx3)+c9o2*( VeloX+VeloY-vx3)*( VeloX+VeloY-vx3)-cu_sq); 
-			(D.f[TSW])[ktsw]=feq;
-			//(D.f[TSW])[ktsw]=zero;
+			(D.f[DIR_MMP])[ktsw]=feq;
+			//(D.f[DIR_MMP])[ktsw]=zero;
 		}
 
 		q = q_dirTSW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*(-VeloXpur-VeloYpur+vx3)+c9o2*(-VeloX-VeloY+vx3)*(-VeloX-VeloY+vx3)-cu_sq); 
-			(D.f[BNE])[kbne]=feq;
-			//(D.f[BNE])[kbne]=zero;
+			(D.f[DIR_PPM])[kbne]=feq;
+			//(D.f[DIR_PPM])[kbne]=zero;
 		}
 
 		q = q_dirTSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*( VeloXpur-VeloYpur+vx3)+c9o2*( VeloX-VeloY+vx3)*( VeloX-VeloY+vx3)-cu_sq); 
-			(D.f[BNW])[kbnw]=feq;
-			//(D.f[BNW])[kbnw]=zero;
+			(D.f[DIR_MPM])[kbnw]=feq;
+			//(D.f[DIR_MPM])[kbnw]=zero;
 		}
 
 		q = q_dirBNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*(-VeloXpur+VeloYpur-vx3)+c9o2*(-VeloX+VeloY-vx3)*(-VeloX+VeloY-vx3)-cu_sq); 
-			(D.f[TSE])[ktse]=feq;
-			//(D.f[TSE])[ktse]=zero;
+			(D.f[DIR_PMP])[ktse]=feq;
+			//(D.f[DIR_PMP])[ktse]=zero;
 		}
 
 		q = q_dirBSE[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*( VeloXpur-VeloYpur-vx3)+c9o2*( VeloX-VeloY-vx3)*( VeloX-VeloY-vx3)-cu_sq); 
-			(D.f[TNW])[ktnw]=feq;
-			//(D.f[TNW])[ktnw]=zero;
+			(D.f[DIR_MPP])[ktnw]=feq;
+			//(D.f[DIR_MPP])[ktnw]=zero;
 		}
 
 		q = q_dirTNW[k];
 		if (q>=c0o1 && q<=c1o1)
 		{
 			feq=c1o216*(drho+c3o1*(-VeloXpur+VeloYpur+vx3)+c9o2*(-VeloX+VeloY+vx3)*(-VeloX+VeloY+vx3)-cu_sq); 
-			(D.f[BSE])[kbse]=feq;
-			//(D.f[BSE])[kbse]=zero;
+			(D.f[DIR_PMM])[kbse]=feq;
+			//(D.f[DIR_PMM])[kbse]=zero;
 		}
 	
 	}
@@ -4748,7 +4747,7 @@ extern "C" __global__ void QVelDev1h27( int inx,
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDeviceComp27(
+__global__ void QVelDeviceComp27(
 											real* velocityX,
 											real* velocityY,
 											real* velocityZ,
@@ -4838,39 +4837,39 @@ extern "C" __global__ void QVelDeviceComp27(
       ////////////////////////////////////////////////////////////////////////////////
       //! - Set local distributions
       //!
-      real f_W    = (dist.f[E   ])[ke   ];
-      real f_E    = (dist.f[W   ])[kw   ];
-      real f_S    = (dist.f[N   ])[kn   ];
-      real f_N    = (dist.f[S   ])[ks   ];
-      real f_B    = (dist.f[T   ])[kt   ];
-      real f_T    = (dist.f[B   ])[kb   ];
-      real f_SW   = (dist.f[NE  ])[kne  ];
-      real f_NE   = (dist.f[SW  ])[ksw  ];
-      real f_NW   = (dist.f[SE  ])[kse  ];
-      real f_SE   = (dist.f[NW  ])[knw  ];
-      real f_BW   = (dist.f[TE  ])[kte  ];
-      real f_TE   = (dist.f[BW  ])[kbw  ];
-      real f_TW   = (dist.f[BE  ])[kbe  ];
-      real f_BE   = (dist.f[TW  ])[ktw  ];
-      real f_BS   = (dist.f[TN  ])[ktn  ];
-      real f_TN   = (dist.f[BS  ])[kbs  ];
-      real f_TS   = (dist.f[BN  ])[kbn  ];
-      real f_BN   = (dist.f[TS  ])[kts  ];
-      real f_BSW  = (dist.f[TNE ])[ktne ];
-      real f_BNE  = (dist.f[TSW ])[ktsw ];
-      real f_BNW  = (dist.f[TSE ])[ktse ];
-      real f_BSE  = (dist.f[TNW ])[ktnw ];
-      real f_TSW  = (dist.f[BNE ])[kbne ];
-      real f_TNE  = (dist.f[BSW ])[kbsw ];
-      real f_TNW  = (dist.f[BSE ])[kbse ];
-      real f_TSE  = (dist.f[BNW ])[kbnw ];
+      real f_W    = (dist.f[DIR_P00   ])[ke   ];
+      real f_E    = (dist.f[DIR_M00   ])[kw   ];
+      real f_S    = (dist.f[DIR_0P0   ])[kn   ];
+      real f_N    = (dist.f[DIR_0M0   ])[ks   ];
+      real f_B    = (dist.f[DIR_00P   ])[kt   ];
+      real f_T    = (dist.f[DIR_00M   ])[kb   ];
+      real f_SW   = (dist.f[DIR_PP0  ])[kne  ];
+      real f_NE   = (dist.f[DIR_MM0  ])[ksw  ];
+      real f_NW   = (dist.f[DIR_PM0  ])[kse  ];
+      real f_SE   = (dist.f[DIR_MP0  ])[knw  ];
+      real f_BW   = (dist.f[DIR_P0P  ])[kte  ];
+      real f_TE   = (dist.f[DIR_M0M  ])[kbw  ];
+      real f_TW   = (dist.f[DIR_P0M  ])[kbe  ];
+      real f_BE   = (dist.f[DIR_M0P  ])[ktw  ];
+      real f_BS   = (dist.f[DIR_0PP  ])[ktn  ];
+      real f_TN   = (dist.f[DIR_0MM  ])[kbs  ];
+      real f_TS   = (dist.f[DIR_0PM  ])[kbn  ];
+      real f_BN   = (dist.f[DIR_0MP  ])[kts  ];
+      real f_BSW  = (dist.f[DIR_PPP ])[ktne ];
+      real f_BNE  = (dist.f[DIR_MMP ])[ktsw ];
+      real f_BNW  = (dist.f[DIR_PMP ])[ktse ];
+      real f_BSE  = (dist.f[DIR_MPP ])[ktnw ];
+      real f_TSW  = (dist.f[DIR_PPM ])[kbne ];
+      real f_TNE  = (dist.f[DIR_MMM ])[kbsw ];
+      real f_TNW  = (dist.f[DIR_PMM ])[kbse ];
+      real f_TSE  = (dist.f[DIR_MPM ])[kbnw ];
 
       ////////////////////////////////////////////////////////////////////////////////
       //! - Calculate macroscopic quantities
       //!
       real drho = f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                   f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[REST])[kzero]); 
+                  f_T + f_B + f_N + f_S + f_E + f_W + ((dist.f[DIR_000])[kzero]); 
 
       real vx1  = (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                    ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -4895,238 +4894,238 @@ extern "C" __global__ void QVelDeviceComp27(
       //! - Update distributions with subgrid distance (q) between zero and one
       //!
       real feq, q, velocityLB, velocityBC;
-      q = (subgridD.q[E])[k];
+      q = (subgridD.q[DIR_P00])[k];
       if (q>=c0o1 && q<=c1o1) // only update distribution for q between zero and one
       {
          velocityLB = vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloX;
-         (dist.f[W])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_M00])[kw] = getInterpolatedDistributionForVeloBC(q, f_E, f_W, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[W])[k];
+      q = (subgridD.q[DIR_M00])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloX;
-         (dist.f[E])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_P00])[ke] = getInterpolatedDistributionForVeloBC(q, f_W, f_E, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[N])[k];
+      q = (subgridD.q[DIR_0P0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloY;
-         (dist.f[S])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_0M0])[ks] = getInterpolatedDistributionForVeloBC(q, f_N, f_S, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[S])[k];
+      q = (subgridD.q[DIR_0M0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloY;
-         (dist.f[N])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_0P0])[kn] = getInterpolatedDistributionForVeloBC(q, f_S, f_N, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[T])[k];
+      q = (subgridD.q[DIR_00P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = VeloZ;
-         (dist.f[B])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_00M])[kb] = getInterpolatedDistributionForVeloBC(q, f_T, f_B, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[B])[k];
+      q = (subgridD.q[DIR_00M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c2o27);
          velocityBC = -VeloZ;
-         (dist.f[T])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
+         (dist.f[DIR_00P])[kt] = getInterpolatedDistributionForVeloBC(q, f_B, f_T, feq, omega, velocityBC, c2o27);
       }
 
-      q = (subgridD.q[NE])[k];
+      q = (subgridD.q[DIR_PP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX + VeloY;
-         (dist.f[SW])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_MM0])[ksw] = getInterpolatedDistributionForVeloBC(q, f_NE, f_SW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[SW])[k];
+      q = (subgridD.q[DIR_MM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloY;
-         (dist.f[NE])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_PP0])[kne] = getInterpolatedDistributionForVeloBC(q, f_SW, f_NE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[SE])[k];
+      q = (subgridD.q[DIR_PM0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX - VeloY;
-         (dist.f[NW])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_MP0])[knw] = getInterpolatedDistributionForVeloBC(q, f_SE, f_NW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[NW])[k];
+      q = (subgridD.q[DIR_MP0])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX + VeloY;
-         (dist.f[SE])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_PM0])[kse] = getInterpolatedDistributionForVeloBC(q, f_NW, f_SE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TE])[k];
+      q = (subgridD.q[DIR_P0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX + VeloZ;
-         (dist.f[BW])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_M0M])[kbw] = getInterpolatedDistributionForVeloBC(q, f_TE, f_BW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BW])[k];
+      q = (subgridD.q[DIR_M0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX - VeloZ;
-         (dist.f[TE])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_P0P])[kte] = getInterpolatedDistributionForVeloBC(q, f_BW, f_TE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BE])[k];
+      q = (subgridD.q[DIR_P0M])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloX - VeloZ;
-         (dist.f[TW])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_M0P])[ktw] = getInterpolatedDistributionForVeloBC(q, f_BE, f_TW, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TW])[k];
+      q = (subgridD.q[DIR_M0P])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloX + VeloZ;
-         (dist.f[BE])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_P0M])[kbe] = getInterpolatedDistributionForVeloBC(q, f_TW, f_BE, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TN])[k];
+      q = (subgridD.q[DIR_0PP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloY + VeloZ;
-         (dist.f[BS])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_0MM])[kbs] = getInterpolatedDistributionForVeloBC(q, f_TN, f_BS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BS])[k];
+      q = (subgridD.q[DIR_0MM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloY - VeloZ;
-         (dist.f[TN])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_0PP])[ktn] = getInterpolatedDistributionForVeloBC(q, f_BS, f_TN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[BN])[k];
+      q = (subgridD.q[DIR_0PM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = VeloY - VeloZ;
-         (dist.f[TS])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_0MP])[kts] = getInterpolatedDistributionForVeloBC(q, f_BN, f_TS, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TS])[k];
+      q = (subgridD.q[DIR_0MP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o54);
          velocityBC = -VeloY + VeloZ;
-         (dist.f[BN])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
+         (dist.f[DIR_0PM])[kbn] = getInterpolatedDistributionForVeloBC(q, f_TS, f_BN, feq, omega, velocityBC, c1o54);
       }
 
-      q = (subgridD.q[TNE])[k];
+      q = (subgridD.q[DIR_PPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX + VeloY + VeloZ;
-         (dist.f[BSW])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_MMM])[kbsw] = getInterpolatedDistributionForVeloBC(q, f_TNE, f_BSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BSW])[k];
+      q = (subgridD.q[DIR_MMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX - VeloY - VeloZ;
-         (dist.f[TNE])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_PPP])[ktne] = getInterpolatedDistributionForVeloBC(q, f_BSW, f_TNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BNE])[k];
+      q = (subgridD.q[DIR_PPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX + VeloY - VeloZ;
-         (dist.f[TSW])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_MMP])[ktsw] = getInterpolatedDistributionForVeloBC(q, f_BNE, f_TSW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TSW])[k];
+      q = (subgridD.q[DIR_MMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX - VeloY + VeloZ;
-         (dist.f[BNE])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_PPM])[kbne] = getInterpolatedDistributionForVeloBC(q, f_TSW, f_BNE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TSE])[k];
+      q = (subgridD.q[DIR_PMP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX - VeloY + VeloZ;
-         (dist.f[BNW])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_MPM])[kbnw] = getInterpolatedDistributionForVeloBC(q, f_TSE, f_BNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BNW])[k];
+      q = (subgridD.q[DIR_MPM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX + VeloY - VeloZ;
-         (dist.f[TSE])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_PMP])[ktse] = getInterpolatedDistributionForVeloBC(q, f_BNW, f_TSE, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[BSE])[k];
+      q = (subgridD.q[DIR_PMM])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = vx1 - vx2 - vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = VeloX - VeloY - VeloZ;
-         (dist.f[TNW])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_MPP])[ktnw] = getInterpolatedDistributionForVeloBC(q, f_BSE, f_TNW, feq, omega, velocityBC, c1o216);
       }
 
-      q = (subgridD.q[TNW])[k];
+      q = (subgridD.q[DIR_MPP])[k];
       if (q>=c0o1 && q<=c1o1)
       {
          velocityLB = -vx1 + vx2 + vx3;
          feq = getEquilibriumForBC(drho, velocityLB, cu_sq, c1o216);
          velocityBC = -VeloX + VeloY + VeloZ;
-         (dist.f[BSE])[kbse] = getInterpolatedDistributionForVeloBC(q, f_TNW, f_BSE, feq, omega, velocityBC, c1o216);
+         (dist.f[DIR_PMM])[kbse] = getInterpolatedDistributionForVeloBC(q, f_TNW, f_BSE, feq, omega, velocityBC, c1o216);
       }
    }
 }
@@ -5171,7 +5170,7 @@ extern "C" __global__ void QVelDeviceComp27(
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void QVelDevice27(int inx,
+__global__ void QVelDevice27(int inx,
                                         int iny,
                                         real* vx,
                                         real* vy,
@@ -5190,63 +5189,63 @@ extern "C" __global__ void QVelDevice27(int inx,
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -5271,32 +5270,32 @@ extern "C" __global__ void QVelDevice27(int inx,
             *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
             *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
             *q_dirBSE, *q_dirBNW; 
-      q_dirE   = &QQ[E   * numberOfBCnodes];
-      q_dirW   = &QQ[W   * numberOfBCnodes];
-      q_dirN   = &QQ[N   * numberOfBCnodes];
-      q_dirS   = &QQ[S   * numberOfBCnodes];
-      q_dirT   = &QQ[T   * numberOfBCnodes];
-      q_dirB   = &QQ[B   * numberOfBCnodes];
-      q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -5359,37 +5358,37 @@ extern "C" __global__ void QVelDevice27(int inx,
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       real vx1, vx2, vx3, drho, feq, q;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
       vx1    =  ((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                 ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -5409,67 +5408,67 @@ extern "C" __global__ void QVelDevice27(int inx,
       //////////////////////////////////////////////////////////////////////////
       if (isEvenTimestep==false)
       {
-         D.f[E   ] = &DD[E   *size_Mat];
-         D.f[W   ] = &DD[W   *size_Mat];
-         D.f[N   ] = &DD[N   *size_Mat];
-         D.f[S   ] = &DD[S   *size_Mat];
-         D.f[T   ] = &DD[T   *size_Mat];
-         D.f[B   ] = &DD[B   *size_Mat];
-         D.f[NE  ] = &DD[NE  *size_Mat];
-         D.f[SW  ] = &DD[SW  *size_Mat];
-         D.f[SE  ] = &DD[SE  *size_Mat];
-         D.f[NW  ] = &DD[NW  *size_Mat];
-         D.f[TE  ] = &DD[TE  *size_Mat];
-         D.f[BW  ] = &DD[BW  *size_Mat];
-         D.f[BE  ] = &DD[BE  *size_Mat];
-         D.f[TW  ] = &DD[TW  *size_Mat];
-         D.f[TN  ] = &DD[TN  *size_Mat];
-         D.f[BS  ] = &DD[BS  *size_Mat];
-         D.f[BN  ] = &DD[BN  *size_Mat];
-         D.f[TS  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[TNE *size_Mat];
-         D.f[TSW ] = &DD[TSW *size_Mat];
-         D.f[TSE ] = &DD[TSE *size_Mat];
-         D.f[TNW ] = &DD[TNW *size_Mat];
-         D.f[BNE ] = &DD[BNE *size_Mat];
-         D.f[BSW ] = &DD[BSW *size_Mat];
-         D.f[BSE ] = &DD[BSE *size_Mat];
-         D.f[BNW ] = &DD[BNW *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
       } 
       else
       {
-         D.f[W   ] = &DD[E   *size_Mat];
-         D.f[E   ] = &DD[W   *size_Mat];
-         D.f[S   ] = &DD[N   *size_Mat];
-         D.f[N   ] = &DD[S   *size_Mat];
-         D.f[B   ] = &DD[T   *size_Mat];
-         D.f[T   ] = &DD[B   *size_Mat];
-         D.f[SW  ] = &DD[NE  *size_Mat];
-         D.f[NE  ] = &DD[SW  *size_Mat];
-         D.f[NW  ] = &DD[SE  *size_Mat];
-         D.f[SE  ] = &DD[NW  *size_Mat];
-         D.f[BW  ] = &DD[TE  *size_Mat];
-         D.f[TE  ] = &DD[BW  *size_Mat];
-         D.f[TW  ] = &DD[BE  *size_Mat];
-         D.f[BE  ] = &DD[TW  *size_Mat];
-         D.f[BS  ] = &DD[TN  *size_Mat];
-         D.f[TN  ] = &DD[BS  *size_Mat];
-         D.f[TS  ] = &DD[BN  *size_Mat];
-         D.f[BN  ] = &DD[TS  *size_Mat];
-         D.f[REST] = &DD[REST*size_Mat];
-         D.f[TNE ] = &DD[BSW *size_Mat];
-         D.f[TSW ] = &DD[BNE *size_Mat];
-         D.f[TSE ] = &DD[BNW *size_Mat];
-         D.f[TNW ] = &DD[BSE *size_Mat];
-         D.f[BNE ] = &DD[TSW *size_Mat];
-         D.f[BSW ] = &DD[TNE *size_Mat];
-         D.f[BSE ] = &DD[TNW *size_Mat];
-         D.f[BNW ] = &DD[TSE *size_Mat];
+         D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+         D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+         D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+         D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+         D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+         D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+         D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+         D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+         D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+         D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+         D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+         D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+         D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+         D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+         D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+         D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+         D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+         D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+         D.f[DIR_000] = &DD[DIR_000*size_Mat];
+         D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+         D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+         D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+         D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+         D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+         D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+         D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+         D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
       }
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       //Test
-      //(D.f[REST])[k]=c1o10;
+      //(D.f[DIR_000])[k]=c1o10;
       ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	  //ToDo anders Klammern
@@ -5478,208 +5477,208 @@ extern "C" __global__ void QVelDevice27(int inx,
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*( vx1        )*/+c9o2*( vx1        )*( vx1        )-cu_sq); 
-         (D.f[W])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);
-         //(D.f[W])[kw]=zero;
+         (D.f[DIR_M00])[kw]=(c1o1-q)/(c1o1+q)*(f_E-f_W+(f_E+f_W-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_E+f_W)-c6o1*c2o27*( VeloX     ))/(c1o1+q);
+         //(D.f[DIR_M00])[kw]=zero;
       }
 
       q = q_dirW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(-vx1        )*/+c9o2*(-vx1        )*(-vx1        )-cu_sq); 
-         (D.f[E])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);
-         //(D.f[E])[ke]=zero;
+         (D.f[DIR_P00])[ke]=(c1o1-q)/(c1o1+q)*(f_W-f_E+(f_W+f_E-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_W+f_E)-c6o1*c2o27*(-VeloX     ))/(c1o1+q);
+         //(D.f[DIR_P00])[ke]=zero;
       }
 
       q = q_dirN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(    vx2     )*/+c9o2*(     vx2    )*(     vx2    )-cu_sq); 
-         (D.f[S])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);
-         //(D.f[S])[ks]=zero;
+         (D.f[DIR_0M0])[ks]=(c1o1-q)/(c1o1+q)*(f_N-f_S+(f_N+f_S-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_N+f_S)-c6o1*c2o27*( VeloY     ))/(c1o1+q);
+         //(D.f[DIR_0M0])[ks]=zero;
       }
 
       q = q_dirS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(   -vx2     )*/+c9o2*(    -vx2    )*(    -vx2    )-cu_sq); 
-         (D.f[N])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);
-         //(D.f[N])[kn]=zero;
+         (D.f[DIR_0P0])[kn]=(c1o1-q)/(c1o1+q)*(f_S-f_N+(f_S+f_N-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_S+f_N)-c6o1*c2o27*(-VeloY     ))/(c1o1+q);
+         //(D.f[DIR_0P0])[kn]=zero;
       }
 
       q = q_dirT[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(         vx3)*/+c9o2*(         vx3)*(         vx3)-cu_sq); 
-         (D.f[B])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);
-         //(D.f[B])[kb]=one;
+         (D.f[DIR_00M])[kb]=(c1o1-q)/(c1o1+q)*(f_T-f_B+(f_T+f_B-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_T+f_B)-c6o1*c2o27*( VeloZ     ))/(c1o1+q);
+         //(D.f[DIR_00M])[kb]=one;
       }
 
       q = q_dirB[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c2o27* (drho/*+three*(        -vx3)*/+c9o2*(        -vx3)*(        -vx3)-cu_sq); 
-         (D.f[T])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
-         //(D.f[T])[kt]=zero;
+         (D.f[DIR_00P])[kt]=(c1o1-q)/(c1o1+q)*(f_B-f_T+(f_B+f_T-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_B+f_T)-c6o1*c2o27*(-VeloZ     ))/(c1o1+q);
+         //(D.f[DIR_00P])[kt]=zero;
       }
 
       q = q_dirNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1+vx2    )*/+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq); 
-         (D.f[SW])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
-         //(D.f[SW])[ksw]=zero;
+         (D.f[DIR_MM0])[ksw]=(c1o1-q)/(c1o1+q)*(f_NE-f_SW+(f_NE+f_SW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NE+f_SW)-c6o1*c1o54*(VeloX+VeloY))/(c1o1+q);
+         //(D.f[DIR_MM0])[ksw]=zero;
       }
 
       q = q_dirSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1-vx2    )*/+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq); 
-         (D.f[NE])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
-         //(D.f[NE])[kne]=zero;
+         (D.f[DIR_PP0])[kne]=(c1o1-q)/(c1o1+q)*(f_SW-f_NE+(f_SW+f_NE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SW+f_NE)-c6o1*c1o54*(-VeloX-VeloY))/(c1o1+q);
+         //(D.f[DIR_PP0])[kne]=zero;
       }
 
       q = q_dirSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1-vx2    )*/+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq); 
-         (D.f[NW])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
-         //(D.f[NW])[knw]=zero;
+         (D.f[DIR_MP0])[knw]=(c1o1-q)/(c1o1+q)*(f_SE-f_NW+(f_SE+f_NW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_SE+f_NW)-c6o1*c1o54*( VeloX-VeloY))/(c1o1+q);
+         //(D.f[DIR_MP0])[knw]=zero;
       }
 
       q = q_dirNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1+vx2    )*/+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq); 
-         (D.f[SE])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
-         //(D.f[SE])[kse]=zero;
+         (D.f[DIR_PM0])[kse]=(c1o1-q)/(c1o1+q)*(f_NW-f_SE+(f_NW+f_SE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_NW+f_SE)-c6o1*c1o54*(-VeloX+VeloY))/(c1o1+q);
+         //(D.f[DIR_PM0])[kse]=zero;
       }
 
       q = q_dirTE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    +vx3)*/+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq); 
-         (D.f[BW])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
-         //(D.f[BW])[kbw]=zero;
+         (D.f[DIR_M0M])[kbw]=(c1o1-q)/(c1o1+q)*(f_TE-f_BW+(f_TE+f_BW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TE+f_BW)-c6o1*c1o54*( VeloX+VeloZ))/(c1o1+q);
+         //(D.f[DIR_M0M])[kbw]=zero;
       }
 
       q = q_dirBW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    -vx3)*/+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq); 
-         (D.f[TE])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
-         //(D.f[TE])[kte]=zero;
+         (D.f[DIR_P0P])[kte]=(c1o1-q)/(c1o1+q)*(f_BW-f_TE+(f_BW+f_TE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BW+f_TE)-c6o1*c1o54*(-VeloX-VeloZ))/(c1o1+q);
+         //(D.f[DIR_P0P])[kte]=zero;
       }
 
       q = q_dirBE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*( vx1    -vx3)*/+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq); 
-         (D.f[TW])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
-         //(D.f[TW])[ktw]=zero;
+         (D.f[DIR_M0P])[ktw]=(c1o1-q)/(c1o1+q)*(f_BE-f_TW+(f_BE+f_TW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BE+f_TW)-c6o1*c1o54*( VeloX-VeloZ))/(c1o1+q);
+         //(D.f[DIR_M0P])[ktw]=zero;
       }
 
       q = q_dirTW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(-vx1    +vx3)*/+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq); 
-         (D.f[BE])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
-         //(D.f[BE])[kbe]=zero;
+         (D.f[DIR_P0M])[kbe]=(c1o1-q)/(c1o1+q)*(f_TW-f_BE+(f_TW+f_BE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TW+f_BE)-c6o1*c1o54*(-VeloX+VeloZ))/(c1o1+q);
+         //(D.f[DIR_P0M])[kbe]=zero;
       }
 
       q = q_dirTN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2+vx3)*/+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq); 
-         (D.f[BS])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BS])[kbs]=zero;
+         (D.f[DIR_0MM])[kbs]=(c1o1-q)/(c1o1+q)*(f_TN-f_BS+(f_TN+f_BS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TN+f_BS)-c6o1*c1o54*( VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_0MM])[kbs]=zero;
       }
 
       q = q_dirBS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2-vx3)*/+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq); 
-         (D.f[TN])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TN])[ktn]=zero;
+         (D.f[DIR_0PP])[ktn]=(c1o1-q)/(c1o1+q)*(f_BS-f_TN+(f_BS+f_TN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BS+f_TN)-c6o1*c1o54*( -VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_0PP])[ktn]=zero;
       }
 
       q = q_dirBN[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(     vx2-vx3)*/+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq); 
-         (D.f[TS])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TS])[kts]=zero;
+         (D.f[DIR_0MP])[kts]=(c1o1-q)/(c1o1+q)*(f_BN-f_TS+(f_BN+f_TS-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BN+f_TS)-c6o1*c1o54*( VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_0MP])[kts]=zero;
       }
 
       q = q_dirTS[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o54* (drho/*+three*(    -vx2+vx3)*/+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq); 
-         (D.f[BN])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BN])[kbn]=zero;
+         (D.f[DIR_0PM])[kbn]=(c1o1-q)/(c1o1+q)*(f_TS-f_BN+(f_TS+f_BN-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TS+f_BN)-c6o1*c1o54*( -VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_0PM])[kbn]=zero;
       }
 
       q = q_dirTNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2+vx3)*/+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq); 
-         (D.f[BSW])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BSW])[kbsw]=zero;
+         (D.f[DIR_MMM])[kbsw]=(c1o1-q)/(c1o1+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNE+f_BSW)-c6o1*c1o216*( VeloX+VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_MMM])[kbsw]=zero;
       }
 
       q = q_dirBSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2-vx3)*/+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq); 
-         (D.f[TNE])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TNE])[ktne]=zero;
+         (D.f[DIR_PPP])[ktne]=(c1o1-q)/(c1o1+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSW+f_TNE)-c6o1*c1o216*(-VeloX-VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_PPP])[ktne]=zero;
       }
 
       q = q_dirBNE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1+vx2-vx3)*/+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq); 
-         (D.f[TSW])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TSW])[ktsw]=zero;
+         (D.f[DIR_MMP])[ktsw]=(c1o1-q)/(c1o1+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNE+f_TSW)-c6o1*c1o216*( VeloX+VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_MMP])[ktsw]=zero;
       }
 
       q = q_dirTSW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1-vx2+vx3)*/+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq); 
-         (D.f[BNE])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BNE])[kbne]=zero;
+         (D.f[DIR_PPM])[kbne]=(c1o1-q)/(c1o1+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSW+f_BNE)-c6o1*c1o216*(-VeloX-VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_PPM])[kbne]=zero;
       }
 
       q = q_dirTSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2+vx3)*/+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq); 
-         (D.f[BNW])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BNW])[kbnw]=zero;
+         (D.f[DIR_MPM])[kbnw]=(c1o1-q)/(c1o1+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TSE+f_BNW)-c6o1*c1o216*( VeloX-VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_MPM])[kbnw]=zero;
       }
 
       q = q_dirBNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2-vx3)*/+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq); 
-         (D.f[TSE])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TSE])[ktse]=zero;
+         (D.f[DIR_PMP])[ktse]=(c1o1-q)/(c1o1+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BNW+f_TSE)-c6o1*c1o216*(-VeloX+VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_PMP])[ktse]=zero;
       }
 
       q = q_dirBSE[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*( vx1-vx2-vx3)*/+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq); 
-         (D.f[TNW])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
-         //(D.f[TNW])[ktnw]=zero;
+         (D.f[DIR_MPP])[ktnw]=(c1o1-q)/(c1o1+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_BSE+f_TNW)-c6o1*c1o216*( VeloX-VeloY-VeloZ))/(c1o1+q);
+         //(D.f[DIR_MPP])[ktnw]=zero;
       }
 
       q = q_dirTNW[k];
       if (q>=c0o1 && q<=c1o1)
       {
          feq=c1o216*(drho/*+three*(-vx1+vx2+vx3)*/+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq); 
-         (D.f[BSE])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
-         //(D.f[BSE])[kbse]=zero;
+         (D.f[DIR_PMM])[kbse]=(c1o1-q)/(c1o1+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-c2o1*feq*om1)/(c1o1-om1))*c1o2+(q*(f_TNW+f_BSE)-c6o1*c1o216*(-VeloX+VeloY+VeloZ))/(c1o1+q);
+         //(D.f[DIR_PMM])[kbse]=zero;
       }
    }
 }
@@ -5724,7 +5723,7 @@ extern "C" __global__ void QVelDevice27(int inx,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void PropellerBC(unsigned int* neighborX,
+__global__ void PropellerBC(unsigned int* neighborX,
                                        unsigned int* neighborY,
                                        unsigned int* neighborZ,
                                        real* rho,
@@ -5755,63 +5754,63 @@ extern "C" __global__ void PropellerBC(unsigned int* neighborX,
         Distributions27 D;
         if (EvenOrOdd==true)
         {
-			D.f[E   ] = &DD[E   *size_Mat];
-			D.f[W   ] = &DD[W   *size_Mat];
-			D.f[N   ] = &DD[N   *size_Mat];
-			D.f[S   ] = &DD[S   *size_Mat];
-			D.f[T   ] = &DD[T   *size_Mat];
-			D.f[B   ] = &DD[B   *size_Mat];
-			D.f[NE  ] = &DD[NE  *size_Mat];
-			D.f[SW  ] = &DD[SW  *size_Mat];
-			D.f[SE  ] = &DD[SE  *size_Mat];
-			D.f[NW  ] = &DD[NW  *size_Mat];
-			D.f[TE  ] = &DD[TE  *size_Mat];
-			D.f[BW  ] = &DD[BW  *size_Mat];
-			D.f[BE  ] = &DD[BE  *size_Mat];
-			D.f[TW  ] = &DD[TW  *size_Mat];
-			D.f[TN  ] = &DD[TN  *size_Mat];
-			D.f[BS  ] = &DD[BS  *size_Mat];
-			D.f[BN  ] = &DD[BN  *size_Mat];
-			D.f[TS  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[TNE ] = &DD[TNE *size_Mat];
-			D.f[TSW ] = &DD[TSW *size_Mat];
-			D.f[TSE ] = &DD[TSE *size_Mat];
-			D.f[TNW ] = &DD[TNW *size_Mat];
-			D.f[BNE ] = &DD[BNE *size_Mat];
-			D.f[BSW ] = &DD[BSW *size_Mat];
-			D.f[BSE ] = &DD[BSE *size_Mat];
-			D.f[BNW ] = &DD[BNW *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
         }
         else
         {
-			D.f[W   ] = &DD[E   *size_Mat];
-			D.f[E   ] = &DD[W   *size_Mat];
-			D.f[S   ] = &DD[N   *size_Mat];
-			D.f[N   ] = &DD[S   *size_Mat];
-			D.f[B   ] = &DD[T   *size_Mat];
-			D.f[T   ] = &DD[B   *size_Mat];
-			D.f[SW  ] = &DD[NE  *size_Mat];
-			D.f[NE  ] = &DD[SW  *size_Mat];
-			D.f[NW  ] = &DD[SE  *size_Mat];
-			D.f[SE  ] = &DD[NW  *size_Mat];
-			D.f[BW  ] = &DD[TE  *size_Mat];
-			D.f[TE  ] = &DD[BW  *size_Mat];
-			D.f[TW  ] = &DD[BE  *size_Mat];
-			D.f[BE  ] = &DD[TW  *size_Mat];
-			D.f[BS  ] = &DD[TN  *size_Mat];
-			D.f[TN  ] = &DD[BS  *size_Mat];
-			D.f[TS  ] = &DD[BN  *size_Mat];
-			D.f[BN  ] = &DD[TS  *size_Mat];
-			D.f[REST] = &DD[REST*size_Mat];
-			D.f[BSW ] = &DD[TNE *size_Mat];
-			D.f[BNE ] = &DD[TSW *size_Mat];
-			D.f[BNW ] = &DD[TSE *size_Mat];
-			D.f[BSE ] = &DD[TNW *size_Mat];
-			D.f[TSW ] = &DD[BNE *size_Mat];
-			D.f[TNE ] = &DD[BSW *size_Mat];
-			D.f[TNW ] = &DD[BSE *size_Mat];
-			D.f[TSE ] = &DD[BNW *size_Mat];
+			D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+			D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+			D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+			D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+			D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+			D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+			D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+			D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+			D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+			D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+			D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+			D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+			D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+			D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+			D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+			D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+			D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+			D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+			D.f[DIR_000] = &DD[DIR_000*size_Mat];
+			D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+			D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+			D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+			D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+			D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+			D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+			D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+			D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
         }
         //////////////////////////////////////////////////////////////////////////
 		unsigned int KQK = k_Q[k];
@@ -5859,59 +5858,59 @@ extern "C" __global__ void PropellerBC(unsigned int* neighborX,
 		real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
 		f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW, f_ZERO;
 
-		f_ZERO= (D.f[REST])[kzero];
-		f_E   = (D.f[E   ])[ke   ];
-		f_W   = (D.f[W   ])[kw   ];
-		f_N   = (D.f[N   ])[kn   ];
-		f_S   = (D.f[S   ])[ks   ];
-		f_T   = (D.f[T   ])[kt   ];
-		f_B   = (D.f[B   ])[kb   ];
-		f_NE  = (D.f[NE  ])[kne  ];
-		f_SW  = (D.f[SW  ])[ksw  ];
-		f_SE  = (D.f[SE  ])[kse  ];
-		f_NW  = (D.f[NW  ])[knw  ];
-		f_TE  = (D.f[TE  ])[kte  ];
-		f_BW  = (D.f[BW  ])[kbw  ];
-		f_BE  = (D.f[BE  ])[kbe  ];
-		f_TW  = (D.f[TW  ])[ktw  ];
-		f_TN  = (D.f[TN  ])[ktn  ];
-		f_BS  = (D.f[BS  ])[kbs  ];
-		f_BN  = (D.f[BN  ])[kbn  ];
-		f_TS  = (D.f[TS  ])[kts  ];
-		f_TNE = (D.f[TNE ])[ktne ];
-		f_BSW = (D.f[BSW ])[kbsw ];
-		f_BNE = (D.f[BNE ])[kbne ];
-		f_TSW = (D.f[TSW ])[ktsw ];
-		f_TSE = (D.f[TSE ])[ktse ];
-		f_BNW = (D.f[BNW ])[kbnw ];
-		f_BSE = (D.f[BSE ])[kbse ];
-		f_TNW = (D.f[TNW ])[ktnw ];
-		//f_W    = (D.f[E   ])[ke   ];
-		//f_E    = (D.f[W   ])[kw   ];
-		//f_S    = (D.f[N   ])[kn   ];
-		//f_N    = (D.f[S   ])[ks   ];
-		//f_B    = (D.f[T   ])[kt   ];
-		//f_T    = (D.f[B   ])[kb   ];
-		//f_SW   = (D.f[NE  ])[kne  ];
-		//f_NE   = (D.f[SW  ])[ksw  ];
-		//f_NW   = (D.f[SE  ])[kse  ];
-		//f_SE   = (D.f[NW  ])[knw  ];
-		//f_BW   = (D.f[TE  ])[kte  ];
-		//f_TE   = (D.f[BW  ])[kbw  ];
-		//f_TW   = (D.f[BE  ])[kbe  ];
-		//f_BE   = (D.f[TW  ])[ktw  ];
-		//f_BS   = (D.f[TN  ])[ktn  ];
-		//f_TN   = (D.f[BS  ])[kbs  ];
-		//f_TS   = (D.f[BN  ])[kbn  ];
-		//f_BN   = (D.f[TS  ])[kts  ];
-		//f_BSW  = (D.f[TNE ])[ktne ];
-		//f_TNE  = (D.f[BSW ])[kbsw ];
-		//f_TSW  = (D.f[BNE ])[kbne ];
-		//f_BNE  = (D.f[TSW ])[ktsw ];
-		//f_BNW  = (D.f[TSE ])[ktse ];
-		//f_TSE  = (D.f[BNW ])[kbnw ];
-		//f_TNW  = (D.f[BSE ])[kbse ];
-		//f_BSE  = (D.f[TNW ])[ktnw ];
+		f_ZERO= (D.f[DIR_000])[kzero];
+		f_E   = (D.f[DIR_P00   ])[ke   ];
+		f_W   = (D.f[DIR_M00   ])[kw   ];
+		f_N   = (D.f[DIR_0P0   ])[kn   ];
+		f_S   = (D.f[DIR_0M0   ])[ks   ];
+		f_T   = (D.f[DIR_00P   ])[kt   ];
+		f_B   = (D.f[DIR_00M   ])[kb   ];
+		f_NE  = (D.f[DIR_PP0  ])[kne  ];
+		f_SW  = (D.f[DIR_MM0  ])[ksw  ];
+		f_SE  = (D.f[DIR_PM0  ])[kse  ];
+		f_NW  = (D.f[DIR_MP0  ])[knw  ];
+		f_TE  = (D.f[DIR_P0P  ])[kte  ];
+		f_BW  = (D.f[DIR_M0M  ])[kbw  ];
+		f_BE  = (D.f[DIR_P0M  ])[kbe  ];
+		f_TW  = (D.f[DIR_M0P  ])[ktw  ];
+		f_TN  = (D.f[DIR_0PP  ])[ktn  ];
+		f_BS  = (D.f[DIR_0MM  ])[kbs  ];
+		f_BN  = (D.f[DIR_0PM  ])[kbn  ];
+		f_TS  = (D.f[DIR_0MP  ])[kts  ];
+		f_TNE = (D.f[DIR_PPP ])[ktne ];
+		f_BSW = (D.f[DIR_MMM ])[kbsw ];
+		f_BNE = (D.f[DIR_PPM ])[kbne ];
+		f_TSW = (D.f[DIR_MMP ])[ktsw ];
+		f_TSE = (D.f[DIR_PMP ])[ktse ];
+		f_BNW = (D.f[DIR_MPM ])[kbnw ];
+		f_BSE = (D.f[DIR_PMM ])[kbse ];
+		f_TNW = (D.f[DIR_MPP ])[ktnw ];
+		//f_W    = (D.f[DIR_P00   ])[ke   ];
+		//f_E    = (D.f[DIR_M00   ])[kw   ];
+		//f_S    = (D.f[DIR_0P0   ])[kn   ];
+		//f_N    = (D.f[DIR_0M0   ])[ks   ];
+		//f_B    = (D.f[DIR_00P   ])[kt   ];
+		//f_T    = (D.f[DIR_00M   ])[kb   ];
+		//f_SW   = (D.f[DIR_PP0  ])[kne  ];
+		//f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+		//f_NW   = (D.f[DIR_PM0  ])[kse  ];
+		//f_SE   = (D.f[DIR_MP0  ])[knw  ];
+		//f_BW   = (D.f[DIR_P0P  ])[kte  ];
+		//f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+		//f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+		//f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+		//f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+		//f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+		//f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+		//f_BN   = (D.f[DIR_0MP  ])[kts  ];
+		//f_BSW  = (D.f[DIR_PPP ])[ktne ];
+		//f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+		//f_TSW  = (D.f[DIR_PPM ])[kbne ];
+		//f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+		//f_BNW  = (D.f[DIR_PMP ])[ktse ];
+		//f_TSE  = (D.f[DIR_MPM ])[kbnw ];
+		//f_TNW  = (D.f[DIR_PMM ])[kbse ];
+		//f_BSE  = (D.f[DIR_MPP ])[ktnw ];
 		//////////////////////////////////////////////////////////////////////////////////
 		real vxo1, vxo2, vxo3, drho;
 		drho   =  /*zero;*/f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
@@ -5992,89 +5991,89 @@ extern "C" __global__ void PropellerBC(unsigned int* neighborX,
          f_BSE  = f_BSE  + ((c1o1+drho) * (-  c1o216*(c3o1*( vxo1-vxo2-vxo3)+c9o2*( vxo1-vxo2-vxo3)*( vxo1-vxo2-vxo3)-cusq) +   c1o216*(c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq2)));
          f_TNW  = f_TNW  + ((c1o1+drho) * (-  c1o216*(c3o1*(-vxo1+vxo2+vxo3)+c9o2*(-vxo1+vxo2+vxo3)*(-vxo1+vxo2+vxo3)-cusq) +   c1o216*(c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq2)));
 
-		(D.f[REST])[kzero] =  f_ZERO;
-        (D.f[E   ])[ke   ] =  f_E   ;	// f_W   ;//    	
-        (D.f[W   ])[kw   ] =  f_W   ;	// f_E   ;//    	
-        (D.f[N   ])[kn   ] =  f_N   ;	// f_S   ;//    	
-        (D.f[S   ])[ks   ] =  f_S   ;	// f_N   ;//    	
-        (D.f[T   ])[kt   ] =  f_T   ;	// f_B   ;//    	
-        (D.f[B   ])[kb   ] =  f_B   ;	// f_T   ;//    	
-        (D.f[NE  ])[kne  ] =  f_NE  ;	// f_SW  ;//    	
-        (D.f[SW  ])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
-        (D.f[SE  ])[kse  ] =  f_SE  ;	// f_NW  ;//    	
-        (D.f[NW  ])[knw  ] =  f_NW  ;	// f_SE  ;//    	
-        (D.f[TE  ])[kte  ] =  f_TE  ;	// f_BW  ;//    	
-        (D.f[BW  ])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
-        (D.f[BE  ])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
-        (D.f[TW  ])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
-        (D.f[TN  ])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
-        (D.f[BS  ])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
-        (D.f[BN  ])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
-        (D.f[TS  ])[kts  ] =  f_TS  ;	// f_BN  ;//    	
-        (D.f[TNE ])[ktne ] =  f_TNE ;	// f_BSW ;//    	
-        (D.f[BSW ])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
-        (D.f[BNE ])[kbne ] =  f_BNE ;	// f_BNW ;//    	
-        (D.f[TSW ])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
-        (D.f[TSE ])[ktse ] =  f_TSE ;	// f_TSW ;//    	
-        (D.f[BNW ])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
-        (D.f[BSE ])[kbse ] =  f_BSE ;	// f_TNW ;//    	
-        (D.f[TNW ])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
+		(D.f[DIR_000])[kzero] =  f_ZERO;
+        (D.f[DIR_P00   ])[ke   ] =  f_E   ;	// f_W   ;//    	
+        (D.f[DIR_M00   ])[kw   ] =  f_W   ;	// f_E   ;//    	
+        (D.f[DIR_0P0   ])[kn   ] =  f_N   ;	// f_S   ;//    	
+        (D.f[DIR_0M0   ])[ks   ] =  f_S   ;	// f_N   ;//    	
+        (D.f[DIR_00P   ])[kt   ] =  f_T   ;	// f_B   ;//    	
+        (D.f[DIR_00M   ])[kb   ] =  f_B   ;	// f_T   ;//    	
+        (D.f[DIR_PP0  ])[kne  ] =  f_NE  ;	// f_SW  ;//    	
+        (D.f[DIR_MM0  ])[ksw  ] =  f_SW  ;	// f_NE  ;//    	
+        (D.f[DIR_PM0  ])[kse  ] =  f_SE  ;	// f_NW  ;//    	
+        (D.f[DIR_MP0  ])[knw  ] =  f_NW  ;	// f_SE  ;//    	
+        (D.f[DIR_P0P  ])[kte  ] =  f_TE  ;	// f_BW  ;//    	
+        (D.f[DIR_M0M  ])[kbw  ] =  f_BW  ;	// f_TE  ;//    	
+        (D.f[DIR_P0M  ])[kbe  ] =  f_BE  ;	// f_TW  ;//    	
+        (D.f[DIR_M0P  ])[ktw  ] =  f_TW  ;	// f_BE  ;//    	
+        (D.f[DIR_0PP  ])[ktn  ] =  f_TN  ;	// f_BS  ;//    	
+        (D.f[DIR_0MM  ])[kbs  ] =  f_BS  ;	// f_TN  ;//    	
+        (D.f[DIR_0PM  ])[kbn  ] =  f_BN  ;	// f_TS  ;//    	
+        (D.f[DIR_0MP  ])[kts  ] =  f_TS  ;	// f_BN  ;//    	
+        (D.f[DIR_PPP ])[ktne ] =  f_TNE ;	// f_BSW ;//    	
+        (D.f[DIR_MMM ])[kbsw ] =  f_BSW ;	// f_BNE ;//    	
+        (D.f[DIR_PPM ])[kbne ] =  f_BNE ;	// f_BNW ;//    	
+        (D.f[DIR_MMP ])[ktsw ] =  f_TSW ;	// f_BSE ;//    	
+        (D.f[DIR_PMP ])[ktse ] =  f_TSE ;	// f_TSW ;//    	
+        (D.f[DIR_MPM ])[kbnw ] =  f_BNW ;	// f_TNE ;//    	
+        (D.f[DIR_PMM ])[kbse ] =  f_BSE ;	// f_TNW ;//    	
+        (D.f[DIR_MPP ])[ktnw ] =  f_TNW ;	// f_TSE ;//    	
 
 		//////////////////////////////////////////////////////////////////////////
-        ////(D.f[REST])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[E   ])[ke   ] =   three*c2over27* ( vx1        );		//six
-        //(D.f[W   ])[kw   ] =   three*c2over27* (-vx1        );		//six
-        //(D.f[N   ])[kn   ] =   three*c2over27* (     vx2    );		//six
-        //(D.f[S   ])[ks   ] =   three*c2over27* (    -vx2    );		//six
-        //(D.f[T   ])[kt   ] =   three*c2over27* (         vx3);		//six
-        //(D.f[B   ])[kb   ] =   three*c2over27* (        -vx3);		//six
-        //(D.f[NE  ])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
-        //(D.f[SW  ])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
-        //(D.f[SE  ])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
-        //(D.f[NW  ])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
-        //(D.f[TE  ])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
-        //(D.f[BW  ])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
-        //(D.f[BE  ])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
-        //(D.f[TW  ])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
-        //(D.f[TN  ])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
-        //(D.f[BS  ])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
-        //(D.f[BN  ])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
-        //(D.f[TS  ])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
-        //(D.f[TNE ])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
-        //(D.f[BSW ])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
-        //(D.f[BNE ])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
-        //(D.f[TSW ])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
-        //(D.f[TSE ])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
-        //(D.f[BNW ])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
-        //(D.f[BSE ])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
-        //(D.f[TNW ])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
-        //(D.f[REST])[kzero] =   c8over27* (drho-cu_sq);
-        //(D.f[E   ])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
-        //(D.f[W   ])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
-        //(D.f[N   ])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
-        //(D.f[S   ])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
-        //(D.f[T   ])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
-        //(D.f[B   ])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
-        //(D.f[NE  ])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-        //(D.f[SW  ])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-        //(D.f[SE  ])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-        //(D.f[NW  ])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-        //(D.f[TE  ])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-        //(D.f[BW  ])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-        //(D.f[BE  ])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-        //(D.f[TW  ])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-        //(D.f[TN  ])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-        //(D.f[BS  ])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-        //(D.f[BN  ])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-        //(D.f[TS  ])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-        //(D.f[TNE ])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-        //(D.f[BSW ])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-        //(D.f[BNE ])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-        //(D.f[TSW ])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-        //(D.f[TSE ])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-        //(D.f[BNW ])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-        //(D.f[BSE ])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-        //(D.f[TNW ])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+        ////(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
+        //(D.f[DIR_P00   ])[ke   ] =   three*c2over27* ( vx1        );		//six
+        //(D.f[DIR_M00   ])[kw   ] =   three*c2over27* (-vx1        );		//six
+        //(D.f[DIR_0P0   ])[kn   ] =   three*c2over27* (     vx2    );		//six
+        //(D.f[DIR_0M0   ])[ks   ] =   three*c2over27* (    -vx2    );		//six
+        //(D.f[DIR_00P   ])[kt   ] =   three*c2over27* (         vx3);		//six
+        //(D.f[DIR_00M   ])[kb   ] =   three*c2over27* (        -vx3);		//six
+        //(D.f[DIR_PP0  ])[kne  ] =   three*c1over54* ( vx1+vx2    );		//six
+        //(D.f[DIR_MM0  ])[ksw  ] =   three*c1over54* (-vx1-vx2    );		//six
+        //(D.f[DIR_PM0  ])[kse  ] =   three*c1over54* ( vx1-vx2    );		//six
+        //(D.f[DIR_MP0  ])[knw  ] =   three*c1over54* (-vx1+vx2    );		//six
+        //(D.f[DIR_P0P  ])[kte  ] =   three*c1over54* ( vx1    +vx3);		//six
+        //(D.f[DIR_M0M  ])[kbw  ] =   three*c1over54* (-vx1    -vx3);		//six
+        //(D.f[DIR_P0M  ])[kbe  ] =   three*c1over54* ( vx1    -vx3);		//six
+        //(D.f[DIR_M0P  ])[ktw  ] =   three*c1over54* (-vx1    +vx3);		//six
+        //(D.f[DIR_0PP  ])[ktn  ] =   three*c1over54* (     vx2+vx3);		//six
+        //(D.f[DIR_0MM  ])[kbs  ] =   three*c1over54* (    -vx2-vx3);		//six
+        //(D.f[DIR_0PM  ])[kbn  ] =   three*c1over54* (     vx2-vx3);		//six
+        //(D.f[DIR_0MP  ])[kts  ] =   three*c1over54* (    -vx2+vx3);		//six
+        //(D.f[DIR_PPP ])[ktne ] =   three*c1over216*( vx1+vx2+vx3);		//six
+        //(D.f[DIR_MMM ])[kbsw ] =   three*c1over216*(-vx1-vx2-vx3);		//six
+        //(D.f[DIR_PPM ])[kbne ] =   three*c1over216*( vx1+vx2-vx3);		//six
+        //(D.f[DIR_MMP ])[ktsw ] =   three*c1over216*(-vx1-vx2+vx3);		//six
+        //(D.f[DIR_PMP ])[ktse ] =   three*c1over216*( vx1-vx2+vx3);		//six
+        //(D.f[DIR_MPM ])[kbnw ] =   three*c1over216*(-vx1+vx2-vx3);		//six
+        //(D.f[DIR_PMM ])[kbse ] =   three*c1over216*( vx1-vx2-vx3);		//six
+        //(D.f[DIR_MPP ])[ktnw ] =   three*c1over216*(-vx1+vx2+vx3);		//six
+        //(D.f[DIR_000])[kzero] =   c8over27* (drho-cu_sq);
+        //(D.f[DIR_P00   ])[ke   ] =   c2over27* (drho+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cu_sq);
+        //(D.f[DIR_M00   ])[kw   ] =   c2over27* (drho+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cu_sq);
+        //(D.f[DIR_0P0   ])[kn   ] =   c2over27* (drho+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cu_sq);
+        //(D.f[DIR_0M0   ])[ks   ] =   c2over27* (drho+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cu_sq);
+        //(D.f[DIR_00P   ])[kt   ] =   c2over27* (drho+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cu_sq);
+        //(D.f[DIR_00M   ])[kb   ] =   c2over27* (drho+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cu_sq);
+        //(D.f[DIR_PP0  ])[kne  ] =   c1over54* (drho+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+        //(D.f[DIR_MM0  ])[ksw  ] =   c1over54* (drho+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+        //(D.f[DIR_PM0  ])[kse  ] =   c1over54* (drho+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+        //(D.f[DIR_MP0  ])[knw  ] =   c1over54* (drho+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+        //(D.f[DIR_P0P  ])[kte  ] =   c1over54* (drho+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+        //(D.f[DIR_M0M  ])[kbw  ] =   c1over54* (drho+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+        //(D.f[DIR_P0M  ])[kbe  ] =   c1over54* (drho+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+        //(D.f[DIR_M0P  ])[ktw  ] =   c1over54* (drho+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+        //(D.f[DIR_0PP  ])[ktn  ] =   c1over54* (drho+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+        //(D.f[DIR_0MM  ])[kbs  ] =   c1over54* (drho+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+        //(D.f[DIR_0PM  ])[kbn  ] =   c1over54* (drho+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+        //(D.f[DIR_0MP  ])[kts  ] =   c1over54* (drho+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+        //(D.f[DIR_PPP ])[ktne ] =   c1over216*(drho+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+        //(D.f[DIR_MMM ])[kbsw ] =   c1over216*(drho+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_PPM ])[kbne ] =   c1over216*(drho+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_MMP ])[ktsw ] =   c1over216*(drho+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_PMP ])[ktse ] =   c1over216*(drho+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+        //(D.f[DIR_MPM ])[kbnw ] =   c1over216*(drho+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+        //(D.f[DIR_PMM ])[kbse ] =   c1over216*(drho+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+        //(D.f[DIR_MPP ])[ktnw ] =   c1over216*(drho+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
 		}
     }
 }
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
index 064e9415ccd32c291fbee980b41a3b187d470eee..16028e2f9f87716f43ed60f82ed513289e381b7c 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WaleCumulant27.cu
@@ -11,7 +11,7 @@ using namespace vf::lbm::dir;
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
+__global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -50,63 +50,63 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -120,33 +120,33 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1007,33 +1007,33 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalav_Cum_AA2016_Comp_SP_27(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;
-			(D.f[W])[kw] = mfcbb;
-			(D.f[N])[k] = mfbab;
-			(D.f[S])[ks] = mfbcb;
-			(D.f[T])[k] = mfbba;
-			(D.f[B])[kb] = mfbbc;
-			(D.f[NE])[k] = mfaab;
-			(D.f[SW])[ksw] = mfccb;
-			(D.f[SE])[ks] = mfacb;
-			(D.f[NW])[kw] = mfcab;
-			(D.f[TE])[k] = mfaba;
-			(D.f[BW])[kbw] = mfcbc;
-			(D.f[BE])[kb] = mfabc;
-			(D.f[TW])[kw] = mfcba;
-			(D.f[TN])[k] = mfbaa;
-			(D.f[BS])[kbs] = mfbcc;
-			(D.f[BN])[kb] = mfbac;
-			(D.f[TS])[ks] = mfbca;
-			(D.f[REST])[k] = mfbbb;
-			(D.f[TNE])[k] = mfaaa;
-			(D.f[TSE])[ks] = mfaca;
-			(D.f[BNE])[kb] = mfaac;
-			(D.f[BSE])[kbs] = mfacc;
-			(D.f[TNW])[kw] = mfcaa;
-			(D.f[TSW])[ksw] = mfcca;
-			(D.f[BNW])[kbw] = mfcac;
-			(D.f[BSW])[kbsw] = mfccc;
+			(D.f[DIR_P00])[k] = mfabb;
+			(D.f[DIR_M00])[kw] = mfcbb;
+			(D.f[DIR_0P0])[k] = mfbab;
+			(D.f[DIR_0M0])[ks] = mfbcb;
+			(D.f[DIR_00P])[k] = mfbba;
+			(D.f[DIR_00M])[kb] = mfbbc;
+			(D.f[DIR_PP0])[k] = mfaab;
+			(D.f[DIR_MM0])[ksw] = mfccb;
+			(D.f[DIR_PM0])[ks] = mfacb;
+			(D.f[DIR_MP0])[kw] = mfcab;
+			(D.f[DIR_P0P])[k] = mfaba;
+			(D.f[DIR_M0M])[kbw] = mfcbc;
+			(D.f[DIR_P0M])[kb] = mfabc;
+			(D.f[DIR_M0P])[kw] = mfcba;
+			(D.f[DIR_0PP])[k] = mfbaa;
+			(D.f[DIR_0MM])[kbs] = mfbcc;
+			(D.f[DIR_0PM])[kb] = mfbac;
+			(D.f[DIR_0MP])[ks] = mfbca;
+			(D.f[DIR_000])[k] = mfbbb;
+			(D.f[DIR_PPP])[k] = mfaaa;
+			(D.f[DIR_PMP])[ks] = mfaca;
+			(D.f[DIR_PPM])[kb] = mfaac;
+			(D.f[DIR_PMM])[kbs] = mfacc;
+			(D.f[DIR_MPP])[kw] = mfcaa;
+			(D.f[DIR_MMP])[ksw] = mfcca;
+			(D.f[DIR_MPM])[kbw] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
index 6da362fafcd67e8a4ddadf9405f92e66bba4c7b4..d48fa80fd14ce15f4a380ed46403654b43c805e8 100644
--- a/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
+++ b/src/gpu/VirtualFluids_GPU/GPU/WallFunction.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 
 //////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void WallFunction27(
+__global__ void WallFunction27(
 										  real* vx,
 										  real* vy,
 										  real* vz,
@@ -26,63 +26,63 @@ extern "C" __global__ void WallFunction27(
    Distributions27 D;
    if (isEvenTimestep==true)
    {
-      D.f[E   ] = &DD[E   *size_Mat];
-      D.f[W   ] = &DD[W   *size_Mat];
-      D.f[N   ] = &DD[N   *size_Mat];
-      D.f[S   ] = &DD[S   *size_Mat];
-      D.f[T   ] = &DD[T   *size_Mat];
-      D.f[B   ] = &DD[B   *size_Mat];
-      D.f[NE  ] = &DD[NE  *size_Mat];
-      D.f[SW  ] = &DD[SW  *size_Mat];
-      D.f[SE  ] = &DD[SE  *size_Mat];
-      D.f[NW  ] = &DD[NW  *size_Mat];
-      D.f[TE  ] = &DD[TE  *size_Mat];
-      D.f[BW  ] = &DD[BW  *size_Mat];
-      D.f[BE  ] = &DD[BE  *size_Mat];
-      D.f[TW  ] = &DD[TW  *size_Mat];
-      D.f[TN  ] = &DD[TN  *size_Mat];
-      D.f[BS  ] = &DD[BS  *size_Mat];
-      D.f[BN  ] = &DD[BN  *size_Mat];
-      D.f[TS  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[TNE *size_Mat];
-      D.f[TSW ] = &DD[TSW *size_Mat];
-      D.f[TSE ] = &DD[TSE *size_Mat];
-      D.f[TNW ] = &DD[TNW *size_Mat];
-      D.f[BNE ] = &DD[BNE *size_Mat];
-      D.f[BSW ] = &DD[BSW *size_Mat];
-      D.f[BSE ] = &DD[BSE *size_Mat];
-      D.f[BNW ] = &DD[BNW *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    } 
    else
    {
-      D.f[W   ] = &DD[E   *size_Mat];
-      D.f[E   ] = &DD[W   *size_Mat];
-      D.f[S   ] = &DD[N   *size_Mat];
-      D.f[N   ] = &DD[S   *size_Mat];
-      D.f[B   ] = &DD[T   *size_Mat];
-      D.f[T   ] = &DD[B   *size_Mat];
-      D.f[SW  ] = &DD[NE  *size_Mat];
-      D.f[NE  ] = &DD[SW  *size_Mat];
-      D.f[NW  ] = &DD[SE  *size_Mat];
-      D.f[SE  ] = &DD[NW  *size_Mat];
-      D.f[BW  ] = &DD[TE  *size_Mat];
-      D.f[TE  ] = &DD[BW  *size_Mat];
-      D.f[TW  ] = &DD[BE  *size_Mat];
-      D.f[BE  ] = &DD[TW  *size_Mat];
-      D.f[BS  ] = &DD[TN  *size_Mat];
-      D.f[TN  ] = &DD[BS  *size_Mat];
-      D.f[TS  ] = &DD[BN  *size_Mat];
-      D.f[BN  ] = &DD[TS  *size_Mat];
-      D.f[REST] = &DD[REST*size_Mat];
-      D.f[TNE ] = &DD[BSW *size_Mat];
-      D.f[TSW ] = &DD[BNE *size_Mat];
-      D.f[TSE ] = &DD[BNW *size_Mat];
-      D.f[TNW ] = &DD[BSE *size_Mat];
-      D.f[BNE ] = &DD[TSW *size_Mat];
-      D.f[BSW ] = &DD[TNE *size_Mat];
-      D.f[BSE ] = &DD[TNW *size_Mat];
-      D.f[BNW ] = &DD[TSE *size_Mat];
+      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    }
    ////////////////////////////////////////////////////////////////////////////////
    const unsigned  x = threadIdx.x;  // Globaler x-Index 
@@ -107,32 +107,32 @@ extern "C" __global__ void WallFunction27(
       //      *q_dirBE,  *q_dirTW,  *q_dirTN,  *q_dirBS,  *q_dirBN,  *q_dirTS,
       //      *q_dirTNE, *q_dirTSW, *q_dirTSE, *q_dirTNW, *q_dirBNE, *q_dirBSW,
       //      *q_dirBSE, *q_dirBNW; 
-      //q_dirE   = &QQ[E   * numberOfBCnodes];
-      //q_dirW   = &QQ[W   * numberOfBCnodes];
-      //q_dirN   = &QQ[N   * numberOfBCnodes];
-      //q_dirS   = &QQ[S   * numberOfBCnodes];
-      //q_dirT   = &QQ[T   * numberOfBCnodes];
-      //q_dirB   = &QQ[B   * numberOfBCnodes];
-      //q_dirNE  = &QQ[NE  * numberOfBCnodes];
-      //q_dirSW  = &QQ[SW  * numberOfBCnodes];
-      //q_dirSE  = &QQ[SE  * numberOfBCnodes];
-      //q_dirNW  = &QQ[NW  * numberOfBCnodes];
-      //q_dirTE  = &QQ[TE  * numberOfBCnodes];
-      //q_dirBW  = &QQ[BW  * numberOfBCnodes];
-      //q_dirBE  = &QQ[BE  * numberOfBCnodes];
-      //q_dirTW  = &QQ[TW  * numberOfBCnodes];
-      //q_dirTN  = &QQ[TN  * numberOfBCnodes];
-      //q_dirBS  = &QQ[BS  * numberOfBCnodes];
-      //q_dirBN  = &QQ[BN  * numberOfBCnodes];
-      //q_dirTS  = &QQ[TS  * numberOfBCnodes];
-      //q_dirTNE = &QQ[TNE * numberOfBCnodes];
-      //q_dirTSW = &QQ[TSW * numberOfBCnodes];
-      //q_dirTSE = &QQ[TSE * numberOfBCnodes];
-      //q_dirTNW = &QQ[TNW * numberOfBCnodes];
-      //q_dirBNE = &QQ[BNE * numberOfBCnodes];
-      //q_dirBSW = &QQ[BSW * numberOfBCnodes];
-      //q_dirBSE = &QQ[BSE * numberOfBCnodes];
-      //q_dirBNW = &QQ[BNW * numberOfBCnodes];
+      //q_dirE   = &QQ[DIR_P00   * numberOfBCnodes];
+      //q_dirW   = &QQ[DIR_M00   * numberOfBCnodes];
+      //q_dirN   = &QQ[DIR_0P0   * numberOfBCnodes];
+      //q_dirS   = &QQ[DIR_0M0   * numberOfBCnodes];
+      //q_dirT   = &QQ[DIR_00P   * numberOfBCnodes];
+      //q_dirB   = &QQ[DIR_00M   * numberOfBCnodes];
+      //q_dirNE  = &QQ[DIR_PP0  * numberOfBCnodes];
+      //q_dirSW  = &QQ[DIR_MM0  * numberOfBCnodes];
+      //q_dirSE  = &QQ[DIR_PM0  * numberOfBCnodes];
+      //q_dirNW  = &QQ[DIR_MP0  * numberOfBCnodes];
+      //q_dirTE  = &QQ[DIR_P0P  * numberOfBCnodes];
+      //q_dirBW  = &QQ[DIR_M0M  * numberOfBCnodes];
+      //q_dirBE  = &QQ[DIR_P0M  * numberOfBCnodes];
+      //q_dirTW  = &QQ[DIR_M0P  * numberOfBCnodes];
+      //q_dirTN  = &QQ[DIR_0PP  * numberOfBCnodes];
+      //q_dirBS  = &QQ[DIR_0MM  * numberOfBCnodes];
+      //q_dirBN  = &QQ[DIR_0PM  * numberOfBCnodes];
+      //q_dirTS  = &QQ[DIR_0MP  * numberOfBCnodes];
+      //q_dirTNE = &QQ[DIR_PPP * numberOfBCnodes];
+      //q_dirTSW = &QQ[DIR_MMP * numberOfBCnodes];
+      //q_dirTSE = &QQ[DIR_PMP * numberOfBCnodes];
+      //q_dirTNW = &QQ[DIR_MPP * numberOfBCnodes];
+      //q_dirBNE = &QQ[DIR_PPM * numberOfBCnodes];
+      //q_dirBSW = &QQ[DIR_MMM * numberOfBCnodes];
+      //q_dirBSE = &QQ[DIR_PMM * numberOfBCnodes];
+      //q_dirBNW = &QQ[DIR_MPM * numberOfBCnodes];
       ////////////////////////////////////////////////////////////////////////////////
       //index
       unsigned int KQK  = k_Q[k];
@@ -167,38 +167,38 @@ extern "C" __global__ void WallFunction27(
       real f_E,  f_W,  f_N,  f_S,  f_T,  f_B,   f_NE,  f_SW,  f_SE,  f_NW,  f_TE,  f_BW,  f_BE,
          f_TW, f_TN, f_BS, f_BN, f_TS, f_TNE, f_TSW, f_TSE, f_TNW, f_BNE, f_BSW, f_BSE, f_BNW;
 
-      f_W    = (D.f[E   ])[ke   ];
-      f_E    = (D.f[W   ])[kw   ];
-      f_S    = (D.f[N   ])[kn   ];
-      f_N    = (D.f[S   ])[ks   ];
-      f_B    = (D.f[T   ])[kt   ];
-      f_T    = (D.f[B   ])[kb   ];
-      f_SW   = (D.f[NE  ])[kne  ];
-      f_NE   = (D.f[SW  ])[ksw  ];
-      f_NW   = (D.f[SE  ])[kse  ];
-      f_SE   = (D.f[NW  ])[knw  ];
-      f_BW   = (D.f[TE  ])[kte  ];
-      f_TE   = (D.f[BW  ])[kbw  ];
-      f_TW   = (D.f[BE  ])[kbe  ];
-      f_BE   = (D.f[TW  ])[ktw  ];
-      f_BS   = (D.f[TN  ])[ktn  ];
-      f_TN   = (D.f[BS  ])[kbs  ];
-      f_TS   = (D.f[BN  ])[kbn  ];
-      f_BN   = (D.f[TS  ])[kts  ];
-      f_BSW  = (D.f[TNE ])[ktne ];
-      f_BNE  = (D.f[TSW ])[ktsw ];
-      f_BNW  = (D.f[TSE ])[ktse ];
-      f_BSE  = (D.f[TNW ])[ktnw ];
-      f_TSW  = (D.f[BNE ])[kbne ];
-      f_TNE  = (D.f[BSW ])[kbsw ];
-      f_TNW  = (D.f[BSE ])[kbse ];
-      f_TSE  = (D.f[BNW ])[kbnw ];
+      f_W    = (D.f[DIR_P00   ])[ke   ];
+      f_E    = (D.f[DIR_M00   ])[kw   ];
+      f_S    = (D.f[DIR_0P0   ])[kn   ];
+      f_N    = (D.f[DIR_0M0   ])[ks   ];
+      f_B    = (D.f[DIR_00P   ])[kt   ];
+      f_T    = (D.f[DIR_00M   ])[kb   ];
+      f_SW   = (D.f[DIR_PP0  ])[kne  ];
+      f_NE   = (D.f[DIR_MM0  ])[ksw  ];
+      f_NW   = (D.f[DIR_PM0  ])[kse  ];
+      f_SE   = (D.f[DIR_MP0  ])[knw  ];
+      f_BW   = (D.f[DIR_P0P  ])[kte  ];
+      f_TE   = (D.f[DIR_M0M  ])[kbw  ];
+      f_TW   = (D.f[DIR_P0M  ])[kbe  ];
+      f_BE   = (D.f[DIR_M0P  ])[ktw  ];
+      f_BS   = (D.f[DIR_0PP  ])[ktn  ];
+      f_TN   = (D.f[DIR_0MM  ])[kbs  ];
+      f_TS   = (D.f[DIR_0PM  ])[kbn  ];
+      f_BN   = (D.f[DIR_0MP  ])[kts  ];
+      f_BSW  = (D.f[DIR_PPP ])[ktne ];
+      f_BNE  = (D.f[DIR_MMP ])[ktsw ];
+      f_BNW  = (D.f[DIR_PMP ])[ktse ];
+      f_BSE  = (D.f[DIR_MPP ])[ktnw ];
+      f_TSW  = (D.f[DIR_PPM ])[kbne ];
+      f_TNE  = (D.f[DIR_MMM ])[kbsw ];
+      f_TNW  = (D.f[DIR_PMM ])[kbse ];
+      f_TSE  = (D.f[DIR_MPM ])[kbnw ];
       ////////////////////////////////////////////////////////////////////////////////
       // real vx2, vx3, feq, q;
       real vx1, drho;
       drho   =  f_TSE + f_TNW + f_TNE + f_TSW + f_BSE + f_BNW + f_BNE + f_BSW +
                 f_BN + f_TS + f_TN + f_BS + f_BE + f_TW + f_TE + f_BW + f_SE + f_NW + f_NE + f_SW + 
-                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[REST])[kzero]); 
+                f_T + f_B + f_N + f_S + f_E + f_W + ((D.f[DIR_000])[kzero]); 
 
        vx1    =  (((f_TSE - f_BNW) - (f_TNW - f_BSE)) + ((f_TNE - f_BSW) - (f_TSW - f_BNE)) +
                  ((f_BE - f_TW)   + (f_TE - f_BW))   + ((f_SE - f_NW)   + (f_NE - f_SW)) +
@@ -234,67 +234,67 @@ extern "C" __global__ void WallFunction27(
    //   //////////////////////////////////////////////////////////////////////////
    //   if (isEvenTimestep==false)
    //   {
-   //      D.f[E   ] = &DD[E   *size_Mat];
-   //      D.f[W   ] = &DD[W   *size_Mat];
-   //      D.f[N   ] = &DD[N   *size_Mat];
-   //      D.f[S   ] = &DD[S   *size_Mat];
-   //      D.f[T   ] = &DD[T   *size_Mat];
-   //      D.f[B   ] = &DD[B   *size_Mat];
-   //      D.f[NE  ] = &DD[NE  *size_Mat];
-   //      D.f[SW  ] = &DD[SW  *size_Mat];
-   //      D.f[SE  ] = &DD[SE  *size_Mat];
-   //      D.f[NW  ] = &DD[NW  *size_Mat];
-   //      D.f[TE  ] = &DD[TE  *size_Mat];
-   //      D.f[BW  ] = &DD[BW  *size_Mat];
-   //      D.f[BE  ] = &DD[BE  *size_Mat];
-   //      D.f[TW  ] = &DD[TW  *size_Mat];
-   //      D.f[TN  ] = &DD[TN  *size_Mat];
-   //      D.f[BS  ] = &DD[BS  *size_Mat];
-   //      D.f[BN  ] = &DD[BN  *size_Mat];
-   //      D.f[TS  ] = &DD[TS  *size_Mat];
-   //      D.f[REST] = &DD[REST*size_Mat];
-   //      D.f[TNE ] = &DD[TNE *size_Mat];
-   //      D.f[TSW ] = &DD[TSW *size_Mat];
-   //      D.f[TSE ] = &DD[TSE *size_Mat];
-   //      D.f[TNW ] = &DD[TNW *size_Mat];
-   //      D.f[BNE ] = &DD[BNE *size_Mat];
-   //      D.f[BSW ] = &DD[BSW *size_Mat];
-   //      D.f[BSE ] = &DD[BSE *size_Mat];
-   //      D.f[BNW ] = &DD[BNW *size_Mat];
+   //      D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+   //      D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+   //      D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+   //      D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+   //      D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+   //      D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+   //      D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+   //      D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+   //      D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+   //      D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+   //      D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+   //      D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+   //      D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+   //      D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+   //      D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+   //      D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+   //      D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+   //      D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //      D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+   //      D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+   //      D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+   //      D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+   //      D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+   //      D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+   //      D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+   //      D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
    //   } 
    //   else
    //   {
-   //      D.f[W   ] = &DD[E   *size_Mat];
-   //      D.f[E   ] = &DD[W   *size_Mat];
-   //      D.f[S   ] = &DD[N   *size_Mat];
-   //      D.f[N   ] = &DD[S   *size_Mat];
-   //      D.f[B   ] = &DD[T   *size_Mat];
-   //      D.f[T   ] = &DD[B   *size_Mat];
-   //      D.f[SW  ] = &DD[NE  *size_Mat];
-   //      D.f[NE  ] = &DD[SW  *size_Mat];
-   //      D.f[NW  ] = &DD[SE  *size_Mat];
-   //      D.f[SE  ] = &DD[NW  *size_Mat];
-   //      D.f[BW  ] = &DD[TE  *size_Mat];
-   //      D.f[TE  ] = &DD[BW  *size_Mat];
-   //      D.f[TW  ] = &DD[BE  *size_Mat];
-   //      D.f[BE  ] = &DD[TW  *size_Mat];
-   //      D.f[BS  ] = &DD[TN  *size_Mat];
-   //      D.f[TN  ] = &DD[BS  *size_Mat];
-   //      D.f[TS  ] = &DD[BN  *size_Mat];
-   //      D.f[BN  ] = &DD[TS  *size_Mat];
-   //      D.f[REST] = &DD[REST*size_Mat];
-   //      D.f[TNE ] = &DD[BSW *size_Mat];
-   //      D.f[TSW ] = &DD[BNE *size_Mat];
-   //      D.f[TSE ] = &DD[BNW *size_Mat];
-   //      D.f[TNW ] = &DD[BSE *size_Mat];
-   //      D.f[BNE ] = &DD[TSW *size_Mat];
-   //      D.f[BSW ] = &DD[TNE *size_Mat];
-   //      D.f[BSE ] = &DD[TNW *size_Mat];
-   //      D.f[BNW ] = &DD[TSE *size_Mat];
+   //      D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+   //      D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+   //      D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+   //      D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+   //      D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+   //      D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+   //      D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+   //      D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+   //      D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+   //      D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+   //      D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+   //      D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+   //      D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+   //      D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+   //      D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+   //      D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+   //      D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+   //      D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+   //      D.f[DIR_000] = &DD[DIR_000*size_Mat];
+   //      D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+   //      D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+   //      D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
+   //      D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+   //      D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+   //      D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+   //      D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+   //      D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
    //   }
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    //   //Test
-   //   //(D.f[REST])[k]=c1o10;
+   //   //(D.f[DIR_000])[k]=c1o10;
    //   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 	  ////ToDo anders Klammern
@@ -303,208 +303,208 @@ extern "C" __global__ void WallFunction27(
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*( vx1        )*/+c9over2*( vx1        )*( vx1        ) * (one + drho)-cu_sq); 
-   //      (D.f[W])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[W])[kw]=zero;
+   //      (D.f[DIR_M00])[kw]=(one-q)/(one+q)*(f_E-f_W+(f_E+f_W-two*feq*om1)/(one-om1))*c1o2+(q*(f_E+f_W)-six*c2over27*( VeloX     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_M00])[kw]=zero;
    //   }
 
    //   q = q_dirW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*(-vx1        )*/+c9over2*(-vx1        )*(-vx1        ) * (one + drho)-cu_sq); 
-   //      (D.f[E])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[E])[ke]=zero;
+   //      (D.f[DIR_P00])[ke]=(one-q)/(one+q)*(f_W-f_E+(f_W+f_E-two*feq*om1)/(one-om1))*c1o2+(q*(f_W+f_E)-six*c2over27*(-VeloX     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_P00])[ke]=zero;
    //   }
 
    //   q = q_dirN[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*(    vx2     )*/+c9over2*(     vx2    )*(     vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[S])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[S])[ks]=zero;
+   //      (D.f[DIR_0M0])[ks]=(one-q)/(one+q)*(f_N-f_S+(f_N+f_S-two*feq*om1)/(one-om1))*c1o2+(q*(f_N+f_S)-six*c2over27*( VeloY     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_0M0])[ks]=zero;
    //   }
 
    //   q = q_dirS[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*(   -vx2     )*/+c9over2*(    -vx2    )*(    -vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[N])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[N])[kn]=zero;
+   //      (D.f[DIR_0P0])[kn]=(one-q)/(one+q)*(f_S-f_N+(f_S+f_N-two*feq*om1)/(one-om1))*c1o2+(q*(f_S+f_N)-six*c2over27*(-VeloY     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_0P0])[kn]=zero;
    //   }
 
    //   q = q_dirT[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*(         vx3)*/+c9over2*(         vx3)*(         vx3) * (one + drho)-cu_sq); 
-   //      (D.f[B])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[B])[kb]=one;
+   //      (D.f[DIR_00M])[kb]=(one-q)/(one+q)*(f_T-f_B+(f_T+f_B-two*feq*om1)/(one-om1))*c1o2+(q*(f_T+f_B)-six*c2over27*( VeloZ     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_00M])[kb]=one;
    //   }
 
    //   q = q_dirB[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c2over27* (drho/*+three*(        -vx3)*/+c9over2*(        -vx3)*(        -vx3) * (one + drho)-cu_sq); 
-   //      (D.f[T])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);// - c2over27 * drho;
-   //      //(D.f[T])[kt]=zero;
+   //      (D.f[DIR_00P])[kt]=(one-q)/(one+q)*(f_B-f_T+(f_B+f_T-two*feq*om1)/(one-om1))*c1o2+(q*(f_B+f_T)-six*c2over27*(-VeloZ     ))/(one+q);// - c2over27 * drho;
+   //      //(D.f[DIR_00P])[kt]=zero;
    //   }
 
    //   q = q_dirNE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*( vx1+vx2    )*/+c9over2*( vx1+vx2    )*( vx1+vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[SW])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);// - c1over54 * drho;
-   //      //(D.f[SW])[ksw]=zero;
+   //      (D.f[DIR_MM0])[ksw]=(one-q)/(one+q)*(f_NE-f_SW+(f_NE+f_SW-two*feq*om1)/(one-om1))*c1o2+(q*(f_NE+f_SW)-six*c1over54*(VeloX+VeloY))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_MM0])[ksw]=zero;
    //   }
 
    //   q = q_dirSW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(-vx1-vx2    )*/+c9over2*(-vx1-vx2    )*(-vx1-vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[NE])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);// - c1over54 * drho;
-   //      //(D.f[NE])[kne]=zero;
+   //      (D.f[DIR_PP0])[kne]=(one-q)/(one+q)*(f_SW-f_NE+(f_SW+f_NE-two*feq*om1)/(one-om1))*c1o2+(q*(f_SW+f_NE)-six*c1over54*(-VeloX-VeloY))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_PP0])[kne]=zero;
    //   }
 
    //   q = q_dirSE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*( vx1-vx2    )*/+c9over2*( vx1-vx2    )*( vx1-vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[NW])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);// - c1over54 * drho;
-   //      //(D.f[NW])[knw]=zero;
+   //      (D.f[DIR_MP0])[knw]=(one-q)/(one+q)*(f_SE-f_NW+(f_SE+f_NW-two*feq*om1)/(one-om1))*c1o2+(q*(f_SE+f_NW)-six*c1over54*( VeloX-VeloY))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_MP0])[knw]=zero;
    //   }
 
    //   q = q_dirNW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(-vx1+vx2    )*/+c9over2*(-vx1+vx2    )*(-vx1+vx2    ) * (one + drho)-cu_sq); 
-   //      (D.f[SE])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);// - c1over54 * drho;
-   //      //(D.f[SE])[kse]=zero;
+   //      (D.f[DIR_PM0])[kse]=(one-q)/(one+q)*(f_NW-f_SE+(f_NW+f_SE-two*feq*om1)/(one-om1))*c1o2+(q*(f_NW+f_SE)-six*c1over54*(-VeloX+VeloY))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_PM0])[kse]=zero;
    //   }
 
    //   q = q_dirTE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*( vx1    +vx3)*/+c9over2*( vx1    +vx3)*( vx1    +vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BW])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[BW])[kbw]=zero;
+   //      (D.f[DIR_M0M])[kbw]=(one-q)/(one+q)*(f_TE-f_BW+(f_TE+f_BW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TE+f_BW)-six*c1over54*( VeloX+VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_M0M])[kbw]=zero;
    //   }
 
    //   q = q_dirBW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(-vx1    -vx3)*/+c9over2*(-vx1    -vx3)*(-vx1    -vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TE])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[TE])[kte]=zero;
+   //      (D.f[DIR_P0P])[kte]=(one-q)/(one+q)*(f_BW-f_TE+(f_BW+f_TE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BW+f_TE)-six*c1over54*(-VeloX-VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_P0P])[kte]=zero;
    //   }
 
    //   q = q_dirBE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*( vx1    -vx3)*/+c9over2*( vx1    -vx3)*( vx1    -vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TW])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[TW])[ktw]=zero;
+   //      (D.f[DIR_M0P])[ktw]=(one-q)/(one+q)*(f_BE-f_TW+(f_BE+f_TW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BE+f_TW)-six*c1over54*( VeloX-VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_M0P])[ktw]=zero;
    //   }
 
    //   q = q_dirTW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(-vx1    +vx3)*/+c9over2*(-vx1    +vx3)*(-vx1    +vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BE])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[BE])[kbe]=zero;
+   //      (D.f[DIR_P0M])[kbe]=(one-q)/(one+q)*(f_TW-f_BE+(f_TW+f_BE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TW+f_BE)-six*c1over54*(-VeloX+VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_P0M])[kbe]=zero;
    //   }
 
    //   q = q_dirTN[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(     vx2+vx3)*/+c9over2*(     vx2+vx3)*(     vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BS])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[BS])[kbs]=zero;
+   //      (D.f[DIR_0MM])[kbs]=(one-q)/(one+q)*(f_TN-f_BS+(f_TN+f_BS-two*feq*om1)/(one-om1))*c1o2+(q*(f_TN+f_BS)-six*c1over54*( VeloY+VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_0MM])[kbs]=zero;
    //   }
 
    //   q = q_dirBS[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(    -vx2-vx3)*/+c9over2*(    -vx2-vx3)*(    -vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TN])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[TN])[ktn]=zero;
+   //      (D.f[DIR_0PP])[ktn]=(one-q)/(one+q)*(f_BS-f_TN+(f_BS+f_TN-two*feq*om1)/(one-om1))*c1o2+(q*(f_BS+f_TN)-six*c1over54*( -VeloY-VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_0PP])[ktn]=zero;
    //   }
 
    //   q = q_dirBN[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(     vx2-vx3)*/+c9over2*(     vx2-vx3)*(     vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TS])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[TS])[kts]=zero;
+   //      (D.f[DIR_0MP])[kts]=(one-q)/(one+q)*(f_BN-f_TS+(f_BN+f_TS-two*feq*om1)/(one-om1))*c1o2+(q*(f_BN+f_TS)-six*c1over54*( VeloY-VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_0MP])[kts]=zero;
    //   }
 
    //   q = q_dirTS[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over54* (drho/*+three*(    -vx2+vx3)*/+c9over2*(    -vx2+vx3)*(    -vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BN])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);// - c1over54 * drho;
-   //      //(D.f[BN])[kbn]=zero;
+   //      (D.f[DIR_0PM])[kbn]=(one-q)/(one+q)*(f_TS-f_BN+(f_TS+f_BN-two*feq*om1)/(one-om1))*c1o2+(q*(f_TS+f_BN)-six*c1over54*( -VeloY+VeloZ))/(one+q);// - c1over54 * drho;
+   //      //(D.f[DIR_0PM])[kbn]=zero;
    //   }
 
    //   q = q_dirTNE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*( vx1+vx2+vx3)*/+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BSW])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[BSW])[kbsw]=zero;
+   //      (D.f[DIR_MMM])[kbsw]=(one-q)/(one+q)*(f_TNE-f_BSW+(f_TNE+f_BSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNE+f_BSW)-six*c1over216*( VeloX+VeloY+VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_MMM])[kbsw]=zero;
    //   }
 
    //   q = q_dirBSW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*(-vx1-vx2-vx3)*/+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TNE])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[TNE])[ktne]=zero;
+   //      (D.f[DIR_PPP])[ktne]=(one-q)/(one+q)*(f_BSW-f_TNE+(f_BSW+f_TNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSW+f_TNE)-six*c1over216*(-VeloX-VeloY-VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_PPP])[ktne]=zero;
    //   }
 
    //   q = q_dirBNE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*( vx1+vx2-vx3)*/+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TSW])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[TSW])[ktsw]=zero;
+   //      (D.f[DIR_MMP])[ktsw]=(one-q)/(one+q)*(f_BNE-f_TSW+(f_BNE+f_TSW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNE+f_TSW)-six*c1over216*( VeloX+VeloY-VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_MMP])[ktsw]=zero;
    //   }
 
    //   q = q_dirTSW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*(-vx1-vx2+vx3)*/+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BNE])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[BNE])[kbne]=zero;
+   //      (D.f[DIR_PPM])[kbne]=(one-q)/(one+q)*(f_TSW-f_BNE+(f_TSW+f_BNE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSW+f_BNE)-six*c1over216*(-VeloX-VeloY+VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_PPM])[kbne]=zero;
    //   }
 
    //   q = q_dirTSE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*( vx1-vx2+vx3)*/+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BNW])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[BNW])[kbnw]=zero;
+   //      (D.f[DIR_MPM])[kbnw]=(one-q)/(one+q)*(f_TSE-f_BNW+(f_TSE+f_BNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_TSE+f_BNW)-six*c1over216*( VeloX-VeloY+VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_MPM])[kbnw]=zero;
    //   }
 
    //   q = q_dirBNW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*(-vx1+vx2-vx3)*/+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TSE])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[TSE])[ktse]=zero;
+   //      (D.f[DIR_PMP])[ktse]=(one-q)/(one+q)*(f_BNW-f_TSE+(f_BNW+f_TSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_BNW+f_TSE)-six*c1over216*(-VeloX+VeloY-VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_PMP])[ktse]=zero;
    //   }
 
    //   q = q_dirBSE[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*( vx1-vx2-vx3)*/+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3) * (one + drho)-cu_sq); 
-   //      (D.f[TNW])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[TNW])[ktnw]=zero;
+   //      (D.f[DIR_MPP])[ktnw]=(one-q)/(one+q)*(f_BSE-f_TNW+(f_BSE+f_TNW-two*feq*om1)/(one-om1))*c1o2+(q*(f_BSE+f_TNW)-six*c1over216*( VeloX-VeloY-VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_MPP])[ktnw]=zero;
    //   }
 
    //   q = q_dirTNW[k];
    //   if (q>=zero && q<=one)
    //   {
    //      feq=c1over216*(drho/*+three*(-vx1+vx2+vx3)*/+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3) * (one + drho)-cu_sq); 
-   //      (D.f[BSE])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);// - c1over216 * drho;
-   //      //(D.f[BSE])[kbse]=zero;
+   //      (D.f[DIR_PMM])[kbse]=(one-q)/(one+q)*(f_TNW-f_BSE+(f_TNW+f_BSE-two*feq*om1)/(one-om1))*c1o2+(q*(f_TNW+f_BSE)-six*c1over216*(-VeloX+VeloY+VeloZ))/(one+q);// - c1over216 * drho;
+   //      //(D.f[DIR_PMM])[kbse]=zero;
    //   }
    }
 }
diff --git a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
index e0a32643a84e24f99affd55eb32a71efadd17056..4e5a862d3fd1ed19109073aae0fe4c731f7f3e91 100644
--- a/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
+++ b/src/gpu/VirtualFluids_GPU/Init/PositionReader.cpp
@@ -139,41 +139,41 @@ void PositionReader::definePropellerQs(Parameter* para)
 	real* QQ                  = para->getParH(para->getFine())->propellerBC.q27[0]; 
 	unsigned int sizeQ           = para->getParH(para->getFine())->propellerBC.numberOfBCnodes; 
 	QforBoundaryConditions Q;
-	Q.q27[E   ] = &QQ[E   *sizeQ];
-	Q.q27[W   ] = &QQ[W   *sizeQ];
-	Q.q27[N   ] = &QQ[N   *sizeQ];
-	Q.q27[S   ] = &QQ[S   *sizeQ];
-	Q.q27[T   ] = &QQ[T   *sizeQ];
-	Q.q27[B   ] = &QQ[B   *sizeQ];
-	Q.q27[NE  ] = &QQ[NE  *sizeQ];
-	Q.q27[SW  ] = &QQ[SW  *sizeQ];
-	Q.q27[SE  ] = &QQ[SE  *sizeQ];
-	Q.q27[NW  ] = &QQ[NW  *sizeQ];
-	Q.q27[TE  ] = &QQ[TE  *sizeQ];
-	Q.q27[BW  ] = &QQ[BW  *sizeQ];
-	Q.q27[BE  ] = &QQ[BE  *sizeQ];
-	Q.q27[TW  ] = &QQ[TW  *sizeQ];
-	Q.q27[TN  ] = &QQ[TN  *sizeQ];
-	Q.q27[BS  ] = &QQ[BS  *sizeQ];
-	Q.q27[BN  ] = &QQ[BN  *sizeQ];
-	Q.q27[TS  ] = &QQ[TS  *sizeQ];
-	Q.q27[REST] = &QQ[REST*sizeQ];
-	Q.q27[TNE ] = &QQ[TNE *sizeQ];
-	Q.q27[TSW ] = &QQ[TSW *sizeQ];
-	Q.q27[TSE ] = &QQ[TSE *sizeQ];
-	Q.q27[TNW ] = &QQ[TNW *sizeQ];
-	Q.q27[BNE ] = &QQ[BNE *sizeQ];
-	Q.q27[BSW ] = &QQ[BSW *sizeQ];
-	Q.q27[BSE ] = &QQ[BSE *sizeQ];
-	Q.q27[BNW ] = &QQ[BNW *sizeQ];
+	Q.q27[DIR_P00   ] = &QQ[DIR_P00   *sizeQ];
+	Q.q27[DIR_M00   ] = &QQ[DIR_M00   *sizeQ];
+	Q.q27[DIR_0P0   ] = &QQ[DIR_0P0   *sizeQ];
+	Q.q27[DIR_0M0   ] = &QQ[DIR_0M0   *sizeQ];
+	Q.q27[DIR_00P   ] = &QQ[DIR_00P   *sizeQ];
+	Q.q27[DIR_00M   ] = &QQ[DIR_00M   *sizeQ];
+	Q.q27[DIR_PP0  ] = &QQ[DIR_PP0  *sizeQ];
+	Q.q27[DIR_MM0  ] = &QQ[DIR_MM0  *sizeQ];
+	Q.q27[DIR_PM0  ] = &QQ[DIR_PM0  *sizeQ];
+	Q.q27[DIR_MP0  ] = &QQ[DIR_MP0  *sizeQ];
+	Q.q27[DIR_P0P  ] = &QQ[DIR_P0P  *sizeQ];
+	Q.q27[DIR_M0M  ] = &QQ[DIR_M0M  *sizeQ];
+	Q.q27[DIR_P0M  ] = &QQ[DIR_P0M  *sizeQ];
+	Q.q27[DIR_M0P  ] = &QQ[DIR_M0P  *sizeQ];
+	Q.q27[DIR_0PP  ] = &QQ[DIR_0PP  *sizeQ];
+	Q.q27[DIR_0MM  ] = &QQ[DIR_0MM  *sizeQ];
+	Q.q27[DIR_0PM  ] = &QQ[DIR_0PM  *sizeQ];
+	Q.q27[DIR_0MP  ] = &QQ[DIR_0MP  *sizeQ];
+	Q.q27[DIR_000] = &QQ[DIR_000*sizeQ];
+	Q.q27[DIR_PPP ] = &QQ[DIR_PPP *sizeQ];
+	Q.q27[DIR_MMP ] = &QQ[DIR_MMP *sizeQ];
+	Q.q27[DIR_PMP ] = &QQ[DIR_PMP *sizeQ];
+	Q.q27[DIR_MPP ] = &QQ[DIR_MPP *sizeQ];
+	Q.q27[DIR_PPM ] = &QQ[DIR_PPM *sizeQ];
+	Q.q27[DIR_MMM ] = &QQ[DIR_MMM *sizeQ];
+	Q.q27[DIR_PMM ] = &QQ[DIR_PMM *sizeQ];
+	Q.q27[DIR_MPM ] = &QQ[DIR_MPM *sizeQ];
 	//////////////////////////////////////////////////////////////////
 	for(uint u=0; u<para->getParH(para->getFine())->propellerBC.numberOfBCnodes; u++)
 	{
-		for (int dir = E; dir<=BSW; dir++)
+		for (int dir = DIR_P00; dir<=DIR_MMM; dir++)
 		{
-			if ((dir==E)  || 
-				(dir==NE) || (dir==SE) || (dir==TE) || (dir==BE) ||
-				(dir==TNE)|| (dir==BNE)|| (dir==TSE)|| (dir==BSE))
+			if ((dir==DIR_P00)  || 
+				(dir==DIR_PP0) || (dir==DIR_PM0) || (dir==DIR_P0P) || (dir==DIR_P0M) ||
+				(dir==DIR_PPP)|| (dir==DIR_PPM)|| (dir==DIR_PMP)|| (dir==DIR_PMM))
 			{
 				Q.q27[dir][u] = 1.0f;
 			} 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
index 3151e6bedeb6a96666f11f0040de2c95b20cc42c..630aaf7339afc2907ab6bfbf65bd5fc55f75e215 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.cpp
@@ -27,6 +27,10 @@ void KernelImp::setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy
     this->checkStrategy = strategy;
 }
 
+bool KernelImp::getKernelUsesFluidNodeIndices(){
+    return this->kernelUsesFluidNodeIndices;
+}
+
 KernelImp::KernelImp(std::shared_ptr<Parameter> para, int level) : para(para), level(level) {}
 
 KernelImp::KernelImp() {}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
index cba3540905df0314d6ce1eb6f0a1eab8d4a5a4c4..0141ddda7e9579cc84148d26727ed81c084ea0c5 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/KernelImp.h
@@ -21,6 +21,7 @@ public:
     KernelGroup getKernelGroup();
 
     void setCheckParameterStrategy(std::shared_ptr<CheckParameterStrategy> strategy);
+    bool getKernelUsesFluidNodeIndices();
 
 protected:
     KernelImp(std::shared_ptr<Parameter> para, int level);
@@ -33,6 +34,8 @@ protected:
     KernelGroup myKernelGroup;
 
     vf::cuda::CudaGrid cudaGrid;
+
+    bool kernelUsesFluidNodeIndices = false;
 };
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
index 3ccba979c6393365d32209a8d7d82c0ffd7dce56..b4c1236300bbb49fe2df1b3f458f506e989e142b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
+__global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -38,125 +38,125 @@ extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[E] = &DD27[E   *size_Mat];
-				D27.f[W] = &DD27[W   *size_Mat];
-				D27.f[N] = &DD27[N   *size_Mat];
-				D27.f[S] = &DD27[S   *size_Mat];
-				D27.f[T] = &DD27[T   *size_Mat];
-				D27.f[B] = &DD27[B   *size_Mat];
-				D27.f[NE] = &DD27[NE  *size_Mat];
-				D27.f[SW] = &DD27[SW  *size_Mat];
-				D27.f[SE] = &DD27[SE  *size_Mat];
-				D27.f[NW] = &DD27[NW  *size_Mat];
-				D27.f[TE] = &DD27[TE  *size_Mat];
-				D27.f[BW] = &DD27[BW  *size_Mat];
-				D27.f[BE] = &DD27[BE  *size_Mat];
-				D27.f[TW] = &DD27[TW  *size_Mat];
-				D27.f[TN] = &DD27[TN  *size_Mat];
-				D27.f[BS] = &DD27[BS  *size_Mat];
-				D27.f[BN] = &DD27[BN  *size_Mat];
-				D27.f[TS] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[TNE] = &DD27[TNE *size_Mat];
-				D27.f[TSW] = &DD27[TSW *size_Mat];
-				D27.f[TSE] = &DD27[TSE *size_Mat];
-				D27.f[TNW] = &DD27[TNW *size_Mat];
-				D27.f[BNE] = &DD27[BNE *size_Mat];
-				D27.f[BSW] = &DD27[BSW *size_Mat];
-				D27.f[BSE] = &DD27[BSE *size_Mat];
-				D27.f[BNW] = &DD27[BNW *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D27.f[W] = &DD27[E   *size_Mat];
-				D27.f[E] = &DD27[W   *size_Mat];
-				D27.f[S] = &DD27[N   *size_Mat];
-				D27.f[N] = &DD27[S   *size_Mat];
-				D27.f[B] = &DD27[T   *size_Mat];
-				D27.f[T] = &DD27[B   *size_Mat];
-				D27.f[SW] = &DD27[NE  *size_Mat];
-				D27.f[NE] = &DD27[SW  *size_Mat];
-				D27.f[NW] = &DD27[SE  *size_Mat];
-				D27.f[SE] = &DD27[NW  *size_Mat];
-				D27.f[BW] = &DD27[TE  *size_Mat];
-				D27.f[TE] = &DD27[BW  *size_Mat];
-				D27.f[TW] = &DD27[BE  *size_Mat];
-				D27.f[BE] = &DD27[TW  *size_Mat];
-				D27.f[BS] = &DD27[TN  *size_Mat];
-				D27.f[TN] = &DD27[BS  *size_Mat];
-				D27.f[TS] = &DD27[BN  *size_Mat];
-				D27.f[BN] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[BSW] = &DD27[TNE *size_Mat];
-				D27.f[BNE] = &DD27[TSW *size_Mat];
-				D27.f[BNW] = &DD27[TSE *size_Mat];
-				D27.f[BSE] = &DD27[TNW *size_Mat];
-				D27.f[TSW] = &DD27[BNE *size_Mat];
-				D27.f[TNE] = &DD27[BSW *size_Mat];
-				D27.f[TNW] = &DD27[BSE *size_Mat];
-				D27.f[TSE] = &DD27[BNW *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -169,61 +169,61 @@ extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 			unsigned int kbs = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fW = (D.f[E])[k];//ke
-			real fE = (D.f[W])[kw];
-			real fS = (D.f[N])[k];//kn
-			real fN = (D.f[S])[ks];
-			real fB = (D.f[T])[k];//kt
-			real fT = (D.f[B])[kb];
-			real fSW = (D.f[NE])[k];//kne
-			real fNE = (D.f[SW])[ksw];
-			real fNW = (D.f[SE])[ks];//kse
-			real fSE = (D.f[NW])[kw];//knw
-			real fBW = (D.f[TE])[k];//kte
-			real fTE = (D.f[BW])[kbw];
-			real fTW = (D.f[BE])[kb];//kbe
-			real fBE = (D.f[TW])[kw];//ktw
-			real fBS = (D.f[TN])[k];//ktn
-			real fTN = (D.f[BS])[kbs];
-			real fTS = (D.f[BN])[kb];//kbn
-			real fBN = (D.f[TS])[ks];//kts
-			real fZERO = (D.f[REST])[k];//kzero
-			real fBSW = (D.f[TNE])[k];//ktne
-			real fBNE = (D.f[TSW])[ksw];//ktsw
-			real fBNW = (D.f[TSE])[ks];//ktse
-			real fBSE = (D.f[TNW])[kw];//ktnw
-			real fTSW = (D.f[BNE])[kb];//kbne
-			real fTNE = (D.f[BSW])[kbsw];
-			real fTNW = (D.f[BSE])[kbs];//kbse
-			real fTSE = (D.f[BNW])[kbw];//kbnw
+			real fW = (D.f[DIR_P00])[k];//ke
+			real fE = (D.f[DIR_M00])[kw];
+			real fS = (D.f[DIR_0P0])[k];//kn
+			real fN = (D.f[DIR_0M0])[ks];
+			real fB = (D.f[DIR_00P])[k];//kt
+			real fT = (D.f[DIR_00M])[kb];
+			real fSW = (D.f[DIR_PP0])[k];//kne
+			real fNE = (D.f[DIR_MM0])[ksw];
+			real fNW = (D.f[DIR_PM0])[ks];//kse
+			real fSE = (D.f[DIR_MP0])[kw];//knw
+			real fBW = (D.f[DIR_P0P])[k];//kte
+			real fTE = (D.f[DIR_M0M])[kbw];
+			real fTW = (D.f[DIR_P0M])[kb];//kbe
+			real fBE = (D.f[DIR_M0P])[kw];//ktw
+			real fBS = (D.f[DIR_0PP])[k];//ktn
+			real fTN = (D.f[DIR_0MM])[kbs];
+			real fTS = (D.f[DIR_0PM])[kb];//kbn
+			real fBN = (D.f[DIR_0MP])[ks];//kts
+			real fZERO = (D.f[DIR_000])[k];//kzero
+			real fBSW = (D.f[DIR_PPP])[k];//ktne
+			real fBNE = (D.f[DIR_MMP])[ksw];//ktsw
+			real fBNW = (D.f[DIR_PMP])[ks];//ktse
+			real fBSE = (D.f[DIR_MPP])[kw];//ktnw
+			real fTSW = (D.f[DIR_PPM])[kb];//kbne
+			real fTNE = (D.f[DIR_MMM])[kbsw];
+			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
+			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D27.f[E])[k];
-			real mfabb = (D27.f[W])[kw];
-			real mfbcb = (D27.f[N])[k];
-			real mfbab = (D27.f[S])[ks];
-			real mfbbc = (D27.f[T])[k];
-			real mfbba = (D27.f[B])[kb];
-			real mfccb = (D27.f[NE])[k];
-			real mfaab = (D27.f[SW])[ksw];
-			real mfcab = (D27.f[SE])[ks];
-			real mfacb = (D27.f[NW])[kw];
-			real mfcbc = (D27.f[TE])[k];
-			real mfaba = (D27.f[BW])[kbw];
-			real mfcba = (D27.f[BE])[kb];
-			real mfabc = (D27.f[TW])[kw];
-			real mfbcc = (D27.f[TN])[k];
-			real mfbaa = (D27.f[BS])[kbs];
-			real mfbca = (D27.f[BN])[kb];
-			real mfbac = (D27.f[TS])[ks];
-			real mfbbb = (D27.f[REST])[k];
-			real mfccc = (D27.f[TNE])[k];
-			real mfaac = (D27.f[TSW])[ksw];
-			real mfcac = (D27.f[TSE])[ks];
-			real mfacc = (D27.f[TNW])[kw];
-			real mfcca = (D27.f[BNE])[kb];
-			real mfaaa = (D27.f[BSW])[kbsw];
-			real mfcaa = (D27.f[BSE])[kbs];
-			real mfaca = (D27.f[BNW])[kbw];
+			real mfcbb = (D27.f[DIR_P00])[k];
+			real mfabb = (D27.f[DIR_M00])[kw];
+			real mfbcb = (D27.f[DIR_0P0])[k];
+			real mfbab = (D27.f[DIR_0M0])[ks];
+			real mfbbc = (D27.f[DIR_00P])[k];
+			real mfbba = (D27.f[DIR_00M])[kb];
+			real mfccb = (D27.f[DIR_PP0])[k];
+			real mfaab = (D27.f[DIR_MM0])[ksw];
+			real mfcab = (D27.f[DIR_PM0])[ks];
+			real mfacb = (D27.f[DIR_MP0])[kw];
+			real mfcbc = (D27.f[DIR_P0P])[k];
+			real mfaba = (D27.f[DIR_M0M])[kbw];
+			real mfcba = (D27.f[DIR_P0M])[kb];
+			real mfabc = (D27.f[DIR_M0P])[kw];
+			real mfbcc = (D27.f[DIR_0PP])[k];
+			real mfbaa = (D27.f[DIR_0MM])[kbs];
+			real mfbca = (D27.f[DIR_0PM])[kb];
+			real mfbac = (D27.f[DIR_0MP])[ks];
+			real mfbbb = (D27.f[DIR_000])[k];
+			real mfccc = (D27.f[DIR_PPP])[k];
+			real mfaac = (D27.f[DIR_MMP])[ksw];
+			real mfcac = (D27.f[DIR_PMP])[ks];
+			real mfacc = (D27.f[DIR_MPP])[kw];
+			real mfcca = (D27.f[DIR_PPM])[kb];
+			real mfaaa = (D27.f[DIR_MMM])[kbsw];
+			real mfcaa = (D27.f[DIR_PMM])[kbs];
+			real mfaca = (D27.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			//Conc
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
@@ -773,33 +773,33 @@ extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D27.f[E])[k] = mfabb;
-			(D27.f[W])[kw] = mfcbb;
-			(D27.f[N])[k] = mfbab;
-			(D27.f[S])[ks] = mfbcb;
-			(D27.f[T])[k] = mfbba;
-			(D27.f[B])[kb] = mfbbc;
-			(D27.f[NE])[k] = mfaab;
-			(D27.f[SW])[ksw] = mfccb;
-			(D27.f[SE])[ks] = mfacb;
-			(D27.f[NW])[kw] = mfcab;
-			(D27.f[TE])[k] = mfaba;
-			(D27.f[BW])[kbw] = mfcbc;
-			(D27.f[BE])[kb] = mfabc;
-			(D27.f[TW])[kw] = mfcba;
-			(D27.f[TN])[k] = mfbaa;
-			(D27.f[BS])[kbs] = mfbcc;
-			(D27.f[BN])[kb] = mfbac;
-			(D27.f[TS])[ks] = mfbca;
-			(D27.f[REST])[k] = mfbbb;
-			(D27.f[TNE])[k] = mfaaa;
-			(D27.f[TSE])[ks] = mfaca;
-			(D27.f[BNE])[kb] = mfaac;
-			(D27.f[BSE])[kbs] = mfacc;
-			(D27.f[TNW])[kw] = mfcaa;
-			(D27.f[TSW])[ksw] = mfcca;
-			(D27.f[BNW])[kbw] = mfcac;
-			(D27.f[BSW])[kbsw] = mfccc;
+			(D27.f[DIR_P00])[k] = mfabb;
+			(D27.f[DIR_M00])[kw] = mfcbb;
+			(D27.f[DIR_0P0])[k] = mfbab;
+			(D27.f[DIR_0M0])[ks] = mfbcb;
+			(D27.f[DIR_00P])[k] = mfbba;
+			(D27.f[DIR_00M])[kb] = mfbbc;
+			(D27.f[DIR_PP0])[k] = mfaab;
+			(D27.f[DIR_MM0])[ksw] = mfccb;
+			(D27.f[DIR_PM0])[ks] = mfacb;
+			(D27.f[DIR_MP0])[kw] = mfcab;
+			(D27.f[DIR_P0P])[k] = mfaba;
+			(D27.f[DIR_M0M])[kbw] = mfcbc;
+			(D27.f[DIR_P0M])[kb] = mfabc;
+			(D27.f[DIR_M0P])[kw] = mfcba;
+			(D27.f[DIR_0PP])[k] = mfbaa;
+			(D27.f[DIR_0MM])[kbs] = mfbcc;
+			(D27.f[DIR_0PM])[kb] = mfbac;
+			(D27.f[DIR_0MP])[ks] = mfbca;
+			(D27.f[DIR_000])[k] = mfbbb;
+			(D27.f[DIR_PPP])[k] = mfaaa;
+			(D27.f[DIR_PMP])[ks] = mfaca;
+			(D27.f[DIR_PPM])[kb] = mfaac;
+			(D27.f[DIR_PMM])[kbs] = mfacc;
+			(D27.f[DIR_MPP])[kw] = mfcaa;
+			(D27.f[DIR_MMP])[ksw] = mfcca;
+			(D27.f[DIR_MPM])[kbw] = mfcac;
+			(D27.f[DIR_MMM])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
index e96bccd492e57cbfb23298e2587648881d9d980c..a5482a10ca15fc3f27245acbe8b47a06d9f917f0 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod27/ADComp27/ADComp27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
+__global__ void LB_KERNEL_AD_COMP_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
index 561ea4924be31c0ac8b6da822a8720336b163210..52ab9ba6e968ec2293f0a1c4959323c43f328206 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -39,63 +39,63 @@ extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions7 D7;
@@ -130,60 +130,60 @@ extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 			unsigned int kbs = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fW = (D.f[E])[k];//ke
-			real fE = (D.f[W])[kw];
-			real fS = (D.f[N])[k];//kn
-			real fN = (D.f[S])[ks];
-			real fB = (D.f[T])[k];//kt
-			real fT = (D.f[B])[kb];
-			real fSW = (D.f[NE])[k];//kne
-			real fNE = (D.f[SW])[ksw];
-			real fNW = (D.f[SE])[ks];//kse
-			real fSE = (D.f[NW])[kw];//knw
-			real fBW = (D.f[TE])[k];//kte
-			real fTE = (D.f[BW])[kbw];
-			real fTW = (D.f[BE])[kb];//kbe
-			real fBE = (D.f[TW])[kw];//ktw
-			real fBS = (D.f[TN])[k];//ktn
-			real fTN = (D.f[BS])[kbs];
-			real fTS = (D.f[BN])[kb];//kbn
-			real fBN = (D.f[TS])[ks];//kts
-			real fZERO = (D.f[REST])[k];//kzero
-			real fBSW = (D.f[TNE])[k];//ktne
-			real fBNE = (D.f[TSW])[ksw];//ktsw
-			real fBNW = (D.f[TSE])[ks];//ktse
-			real fBSE = (D.f[TNW])[kw];//ktnw
-			real fTSW = (D.f[BNE])[kb];//kbne
-			real fTNE = (D.f[BSW])[kbsw];
-			real fTNW = (D.f[BSE])[kbs];//kbse
-			real fTSE = (D.f[BNW])[kbw];//kbnw
-										   //real fE    =  (D.f[E   ])[k  ];//ke
-										   //real fW    =  (D.f[W   ])[kw ];
-										   //real fN    =  (D.f[N   ])[k  ];//kn
-										   //real fS    =  (D.f[S   ])[ks ];
-										   //real fT    =  (D.f[T   ])[k  ];//kt
-										   //real fB    =  (D.f[B   ])[kb ];
-										   //real fNE   =  (D.f[NE  ])[k  ];//kne
-										   //real fSW   =  (D.f[SW  ])[ksw];
-										   //real fSE   =  (D.f[SE  ])[ks ];//kse
-										   //real fNW   =  (D.f[NW  ])[kw ];//knw
-										   //real fTE   =  (D.f[TE  ])[k  ];//kte
-										   //real fBW   =  (D.f[BW  ])[kbw];
-										   //real fBE   =  (D.f[BE  ])[kb ];//kbe
-										   //real fTW   =  (D.f[TW  ])[kw ];//ktw
-										   //real fTN   =  (D.f[TN  ])[k  ];//ktn
-										   //real fBS   =  (D.f[BS  ])[kbs];
-										   //real fBN   =  (D.f[BN  ])[kb ];//kbn
-										   //real fTS   =  (D.f[TS  ])[ks ];//kts
-										   //real fZERO =  (D.f[REST])[k  ];//kzero
-										   //real fTNE   = (D.f[TNE ])[k  ];//ktne
-										   //real fTSW   = (D.f[TSW ])[ksw];//ktsw
-										   //real fTSE   = (D.f[TSE ])[ks ];//ktse
-										   //real fTNW   = (D.f[TNW ])[kw ];//ktnw
-										   //real fBNE   = (D.f[BNE ])[kb ];//kbne
-										   //real fBSW   = (D.f[BSW ])[kbsw];
-										   //real fBSE   = (D.f[BSE ])[kbs];//kbse
-										   //real fBNW   = (D.f[BNW ])[kbw];//kbnw
+			real fW = (D.f[DIR_P00])[k];//ke
+			real fE = (D.f[DIR_M00])[kw];
+			real fS = (D.f[DIR_0P0])[k];//kn
+			real fN = (D.f[DIR_0M0])[ks];
+			real fB = (D.f[DIR_00P])[k];//kt
+			real fT = (D.f[DIR_00M])[kb];
+			real fSW = (D.f[DIR_PP0])[k];//kne
+			real fNE = (D.f[DIR_MM0])[ksw];
+			real fNW = (D.f[DIR_PM0])[ks];//kse
+			real fSE = (D.f[DIR_MP0])[kw];//knw
+			real fBW = (D.f[DIR_P0P])[k];//kte
+			real fTE = (D.f[DIR_M0M])[kbw];
+			real fTW = (D.f[DIR_P0M])[kb];//kbe
+			real fBE = (D.f[DIR_M0P])[kw];//ktw
+			real fBS = (D.f[DIR_0PP])[k];//ktn
+			real fTN = (D.f[DIR_0MM])[kbs];
+			real fTS = (D.f[DIR_0PM])[kb];//kbn
+			real fBN = (D.f[DIR_0MP])[ks];//kts
+			real fZERO = (D.f[DIR_000])[k];//kzero
+			real fBSW = (D.f[DIR_PPP])[k];//ktne
+			real fBNE = (D.f[DIR_MMP])[ksw];//ktsw
+			real fBNW = (D.f[DIR_PMP])[ks];//ktse
+			real fBSE = (D.f[DIR_MPP])[kw];//ktnw
+			real fTSW = (D.f[DIR_PPM])[kb];//kbne
+			real fTNE = (D.f[DIR_MMM])[kbsw];
+			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
+			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
+										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00   ])[kw ];
+										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
+										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M   ])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
+										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
index 01e64563499a3a4d0a515beee7c95cba75949640..bea40443ab619fb08b4c5656105c7792c4f11bd1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Compressible/Mod7/ADComp7/ADComp7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Comp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Comp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
index f64b5f7681680f4c3b396964409a6568d8f97d0c..e686825ed100417110b02360876dec076553d7de 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -38,125 +38,125 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[E] = &DD27[E   *size_Mat];
-				D27.f[W] = &DD27[W   *size_Mat];
-				D27.f[N] = &DD27[N   *size_Mat];
-				D27.f[S] = &DD27[S   *size_Mat];
-				D27.f[T] = &DD27[T   *size_Mat];
-				D27.f[B] = &DD27[B   *size_Mat];
-				D27.f[NE] = &DD27[NE  *size_Mat];
-				D27.f[SW] = &DD27[SW  *size_Mat];
-				D27.f[SE] = &DD27[SE  *size_Mat];
-				D27.f[NW] = &DD27[NW  *size_Mat];
-				D27.f[TE] = &DD27[TE  *size_Mat];
-				D27.f[BW] = &DD27[BW  *size_Mat];
-				D27.f[BE] = &DD27[BE  *size_Mat];
-				D27.f[TW] = &DD27[TW  *size_Mat];
-				D27.f[TN] = &DD27[TN  *size_Mat];
-				D27.f[BS] = &DD27[BS  *size_Mat];
-				D27.f[BN] = &DD27[BN  *size_Mat];
-				D27.f[TS] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[TNE] = &DD27[TNE *size_Mat];
-				D27.f[TSW] = &DD27[TSW *size_Mat];
-				D27.f[TSE] = &DD27[TSE *size_Mat];
-				D27.f[TNW] = &DD27[TNW *size_Mat];
-				D27.f[BNE] = &DD27[BNE *size_Mat];
-				D27.f[BSW] = &DD27[BSW *size_Mat];
-				D27.f[BSE] = &DD27[BSE *size_Mat];
-				D27.f[BNW] = &DD27[BNW *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D27.f[W] = &DD27[E   *size_Mat];
-				D27.f[E] = &DD27[W   *size_Mat];
-				D27.f[S] = &DD27[N   *size_Mat];
-				D27.f[N] = &DD27[S   *size_Mat];
-				D27.f[B] = &DD27[T   *size_Mat];
-				D27.f[T] = &DD27[B   *size_Mat];
-				D27.f[SW] = &DD27[NE  *size_Mat];
-				D27.f[NE] = &DD27[SW  *size_Mat];
-				D27.f[NW] = &DD27[SE  *size_Mat];
-				D27.f[SE] = &DD27[NW  *size_Mat];
-				D27.f[BW] = &DD27[TE  *size_Mat];
-				D27.f[TE] = &DD27[BW  *size_Mat];
-				D27.f[TW] = &DD27[BE  *size_Mat];
-				D27.f[BE] = &DD27[TW  *size_Mat];
-				D27.f[BS] = &DD27[TN  *size_Mat];
-				D27.f[TN] = &DD27[BS  *size_Mat];
-				D27.f[TS] = &DD27[BN  *size_Mat];
-				D27.f[BN] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[BSW] = &DD27[TNE *size_Mat];
-				D27.f[BNE] = &DD27[TSW *size_Mat];
-				D27.f[BNW] = &DD27[TSE *size_Mat];
-				D27.f[BSE] = &DD27[TNW *size_Mat];
-				D27.f[TSW] = &DD27[BNE *size_Mat];
-				D27.f[TNE] = &DD27[BSW *size_Mat];
-				D27.f[TNW] = &DD27[BSE *size_Mat];
-				D27.f[TSE] = &DD27[BNW *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_MPM] = &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_PMM] = &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -169,61 +169,61 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			unsigned int kbs = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fW = (D.f[E])[k];//ke
-			real fE = (D.f[W])[kw];
-			real fS = (D.f[N])[k];//kn
-			real fN = (D.f[S])[ks];
-			real fB = (D.f[T])[k];//kt
-			real fT = (D.f[B])[kb];
-			real fSW = (D.f[NE])[k];//kne
-			real fNE = (D.f[SW])[ksw];
-			real fNW = (D.f[SE])[ks];//kse
-			real fSE = (D.f[NW])[kw];//knw
-			real fBW = (D.f[TE])[k];//kte
-			real fTE = (D.f[BW])[kbw];
-			real fTW = (D.f[BE])[kb];//kbe
-			real fBE = (D.f[TW])[kw];//ktw
-			real fBS = (D.f[TN])[k];//ktn
-			real fTN = (D.f[BS])[kbs];
-			real fTS = (D.f[BN])[kb];//kbn
-			real fBN = (D.f[TS])[ks];//kts
-			//real fZERO = (D.f[REST])[k];//kzero
-			real fBSW = (D.f[TNE])[k];//ktne
-			real fBNE = (D.f[TSW])[ksw];//ktsw
-			real fBNW = (D.f[TSE])[ks];//ktse
-			real fBSE = (D.f[TNW])[kw];//ktnw
-			real fTSW = (D.f[BNE])[kb];//kbne
-			real fTNE = (D.f[BSW])[kbsw];
-			real fTNW = (D.f[BSE])[kbs];//kbse
-			real fTSE = (D.f[BNW])[kbw];//kbnw
+			real fW = (D.f[DIR_P00])[k];//ke
+			real fE = (D.f[DIR_M00])[kw];
+			real fS = (D.f[DIR_0P0])[k];//kn
+			real fN = (D.f[DIR_0M0])[ks];
+			real fB = (D.f[DIR_00P])[k];//kt
+			real fT = (D.f[DIR_00M])[kb];
+			real fSW = (D.f[DIR_PP0])[k];//kne
+			real fNE = (D.f[DIR_MM0])[ksw];
+			real fNW = (D.f[DIR_PM0])[ks];//kse
+			real fSE = (D.f[DIR_MP0])[kw];//knw
+			real fBW = (D.f[DIR_P0P])[k];//kte
+			real fTE = (D.f[DIR_M0M])[kbw];
+			real fTW = (D.f[DIR_P0M])[kb];//kbe
+			real fBE = (D.f[DIR_M0P])[kw];//ktw
+			real fBS = (D.f[DIR_0PP])[k];//ktn
+			real fTN = (D.f[DIR_0MM])[kbs];
+			real fTS = (D.f[DIR_0PM])[kb];//kbn
+			real fBN = (D.f[DIR_0MP])[ks];//kts
+			//real fZERO = (D.f[DIR_000])[k];//kzero
+			real fBSW = (D.f[DIR_PPP])[k];//ktne
+			real fBNE = (D.f[DIR_MMP])[ksw];//ktsw
+			real fBNW = (D.f[DIR_PMP])[ks];//ktse
+			real fBSE = (D.f[DIR_MPP])[kw];//ktnw
+			real fTSW = (D.f[DIR_PPM])[kb];//kbne
+			real fTNE = (D.f[DIR_MMM])[kbsw];
+			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
+			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
-										   //real f27E    =  (D27.f[E   ])[k  ];//ke
-										   //real f27W    =  (D27.f[W   ])[kw ];
-										   //real f27N    =  (D27.f[N   ])[k  ];//kn
-										   //real f27S    =  (D27.f[S   ])[ks ];
-										   //real f27T    =  (D27.f[T   ])[k  ];//kt
-										   //real f27B    =  (D27.f[B   ])[kb ];
-										   //real f27NE   =  (D27.f[NE  ])[k  ];//kne
-										   //real f27SW   =  (D27.f[SW  ])[ksw];
-										   //real f27SE   =  (D27.f[SE  ])[ks ];//kse
-										   //real f27NW   =  (D27.f[NW  ])[kw ];//knw
-										   //real f27TE   =  (D27.f[TE  ])[k  ];//kte
-										   //real f27BW   =  (D27.f[BW  ])[kbw];
-										   //real f27BE   =  (D27.f[BE  ])[kb ];//kbe
-										   //real f27TW   =  (D27.f[TW  ])[kw ];//ktw
-										   //real f27TN   =  (D27.f[TN  ])[k  ];//ktn
-										   //real f27BS   =  (D27.f[BS  ])[kbs];
-										   //real f27BN   =  (D27.f[BN  ])[kb ];//kbn
-										   //real f27TS   =  (D27.f[TS  ])[ks ];//kts
-										   //real f27ZERO =  (D27.f[REST])[k  ];//kzero
-										   //real f27TNE  =  (D27.f[TNE ])[k  ];//ktne
-										   //real f27TSW  =  (D27.f[TSW ])[ksw];//ktsw
-										   //real f27TSE  =  (D27.f[TSE ])[ks ];//ktse
-										   //real f27TNW  =  (D27.f[TNW ])[kw ];//ktnw
-										   //real f27BNE  =  (D27.f[BNE ])[kb ];//kbne
-										   //real f27BSW  =  (D27.f[BSW ])[kbsw];
-										   //real f27BSE  =  (D27.f[BSE ])[kbs];//kbse
-										   //real f27BNW  =  (D27.f[BNW ])[kbw];//kbnw
+										   //real f27E    =  (D27.f[DIR_P00   ])[k  ];//ke
+										   //real f27W    =  (D27.f[DIR_M00   ])[kw ];
+										   //real f27N    =  (D27.f[DIR_0P0   ])[k  ];//kn
+										   //real f27S    =  (D27.f[DIR_0M0   ])[ks ];
+										   //real f27T    =  (D27.f[DIR_00P   ])[k  ];//kt
+										   //real f27B    =  (D27.f[DIR_00M   ])[kb ];
+										   //real f27NE   =  (D27.f[DIR_PP0  ])[k  ];//kne
+										   //real f27SW   =  (D27.f[DIR_MM0  ])[ksw];
+										   //real f27SE   =  (D27.f[DIR_PM0  ])[ks ];//kse
+										   //real f27NW   =  (D27.f[DIR_MP0  ])[kw ];//knw
+										   //real f27TE   =  (D27.f[DIR_P0P  ])[k  ];//kte
+										   //real f27BW   =  (D27.f[DIR_M0M  ])[kbw];
+										   //real f27BE   =  (D27.f[DIR_P0M  ])[kb ];//kbe
+										   //real f27TW   =  (D27.f[DIR_M0P  ])[kw ];//ktw
+										   //real f27TN   =  (D27.f[DIR_0PP  ])[k  ];//ktn
+										   //real f27BS   =  (D27.f[DIR_0MM  ])[kbs];
+										   //real f27BN   =  (D27.f[DIR_0PM  ])[kb ];//kbn
+										   //real f27TS   =  (D27.f[DIR_0MP  ])[ks ];//kts
+										   //real f27ZERO =  (D27.f[DIR_000])[k  ];//kzero
+										   //real f27TNE  =  (D27.f[DIR_PPP ])[k  ];//ktne
+										   //real f27TSW  =  (D27.f[DIR_MMP ])[ksw];//ktsw
+										   //real f27TSE  =  (D27.f[DIR_PMP ])[ks ];//ktse
+										   //real f27TNW  =  (D27.f[DIR_MPP ])[kw ];//ktnw
+										   //real f27BNE  =  (D27.f[DIR_PPM ])[kb ];//kbne
+										   //real f27BSW  =  (D27.f[DIR_MMM ])[kbsw];
+										   //real f27BSE  =  (D27.f[DIR_PMM ])[kbs];//kbse
+										   //real f27BNW  =  (D27.f[DIR_MPM ])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 										   //real vx1     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fTSE-fBNW)+(fBSE-fTNW) +(fNE-fSW)+(fSE-fNW)+(fTE-fBW)+(fBE-fTW)+(fE-fW));
 										   //real vx2     =  ((fTNE-fBSW)+(fBNE-fTSW)+(fBNW-fTSE)+(fTNW-fBSE) +(fNE-fSW)+(fNW-fSE)+(fTN-fBS)+(fBN-fTS)+(fN-fS));
@@ -231,33 +231,33 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 										   ////////////////////////////////////////////////////////////////////////////////
 
 
-			real mfcbb = (D27.f[E])[k];
-			real mfabb = (D27.f[W])[kw];
-			real mfbcb = (D27.f[N])[k];
-			real mfbab = (D27.f[S])[ks];
-			real mfbbc = (D27.f[T])[k];
-			real mfbba = (D27.f[B])[kb];
-			real mfccb = (D27.f[NE])[k];
-			real mfaab = (D27.f[SW])[ksw];
-			real mfcab = (D27.f[SE])[ks];
-			real mfacb = (D27.f[NW])[kw];
-			real mfcbc = (D27.f[TE])[k];
-			real mfaba = (D27.f[BW])[kbw];
-			real mfcba = (D27.f[BE])[kb];
-			real mfabc = (D27.f[TW])[kw];
-			real mfbcc = (D27.f[TN])[k];
-			real mfbaa = (D27.f[BS])[kbs];
-			real mfbca = (D27.f[BN])[kb];
-			real mfbac = (D27.f[TS])[ks];
-			real mfbbb = (D27.f[REST])[k];
-			real mfccc = (D27.f[TNE])[k];
-			real mfaac = (D27.f[TSW])[ksw];
-			real mfcac = (D27.f[TSE])[ks];
-			real mfacc = (D27.f[TNW])[kw];
-			real mfcca = (D27.f[BNE])[kb];
-			real mfaaa = (D27.f[BSW])[kbsw];
-			real mfcaa = (D27.f[BSE])[kbs];
-			real mfaca = (D27.f[BNW])[kbw];
+			real mfcbb = (D27.f[DIR_P00])[k];
+			real mfabb = (D27.f[DIR_M00])[kw];
+			real mfbcb = (D27.f[DIR_0P0])[k];
+			real mfbab = (D27.f[DIR_0M0])[ks];
+			real mfbbc = (D27.f[DIR_00P])[k];
+			real mfbba = (D27.f[DIR_00M])[kb];
+			real mfccb = (D27.f[DIR_PP0])[k];
+			real mfaab = (D27.f[DIR_MM0])[ksw];
+			real mfcab = (D27.f[DIR_PM0])[ks];
+			real mfacb = (D27.f[DIR_MP0])[kw];
+			real mfcbc = (D27.f[DIR_P0P])[k];
+			real mfaba = (D27.f[DIR_M0M])[kbw];
+			real mfcba = (D27.f[DIR_P0M])[kb];
+			real mfabc = (D27.f[DIR_M0P])[kw];
+			real mfbcc = (D27.f[DIR_0PP])[k];
+			real mfbaa = (D27.f[DIR_0MM])[kbs];
+			real mfbca = (D27.f[DIR_0PM])[kb];
+			real mfbac = (D27.f[DIR_0MP])[ks];
+			real mfbbb = (D27.f[DIR_000])[k];
+			real mfccc = (D27.f[DIR_PPP])[k];
+			real mfaac = (D27.f[DIR_MMP])[ksw];
+			real mfcac = (D27.f[DIR_PMP])[ks];
+			real mfacc = (D27.f[DIR_MPP])[kw];
+			real mfcca = (D27.f[DIR_PPM])[kb];
+			real mfaaa = (D27.f[DIR_MMM])[kbsw];
+			real mfcaa = (D27.f[DIR_PMM])[kbs];
+			real mfaca = (D27.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			//Conc
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
@@ -301,33 +301,33 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 			//                  (f27E+f27W)+(f27N+f27S)+(f27T+f27B)+f27ZERO;
 			//real cusq    =  c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-			//(D27.f[ E   ])[k   ] = f27W    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq);                                                                     
-			//(D27.f[ W   ])[kw  ] = f27E    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq);                                                                     
-			//(D27.f[ N   ])[k   ] = f27S    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cusq);
-			//(D27.f[ S   ])[ks  ] = f27N    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cusq);
-			//(D27.f[ T   ])[k   ] = f27B    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cusq);
-			//(D27.f[ B   ])[kb  ] = f27T    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cusq);
-			//(D27.f[ NE  ])[k   ] = f27SW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq);
-			//(D27.f[ SW  ])[ksw ] = f27NE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cusq);
-			//(D27.f[ SE  ])[ks  ] = f27NW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq);
-			//(D27.f[ NW  ])[kw  ] = f27SE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq);
-			//(D27.f[ TE  ])[k   ] = f27BW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq);
-			//(D27.f[ BW  ])[kbw ] = f27TE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq);
-			//(D27.f[ BE  ])[kb  ] = f27TW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq);
-			//(D27.f[ TW  ])[kw  ] = f27BE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq);
-			//(D27.f[ TN  ])[k   ] = f27BS   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq);
-			//(D27.f[ BS  ])[kbs ] = f27TN   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq);
-			//(D27.f[ BN  ])[kb  ] = f27TS   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cusq);
-			//(D27.f[ TS  ])[ks  ] = f27BN   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq);
-			//(D27.f[ REST])[k   ] = f27ZERO *(one-omegaD)+omegaD* c8over27* ConcD*(one-cusq);
-			//(D27.f[ TNE ])[k   ] = f27BSW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq);
-			//(D27.f[ TSE ])[ks  ] = f27BNW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq);
-			//(D27.f[ BNE ])[kb  ] = f27TSW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq);
-			//(D27.f[ BSE ])[kbs ] = f27TNW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq);
-			//(D27.f[ TNW ])[kw  ] = f27BSE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq);
-			//(D27.f[ TSW ])[ksw ] = f27BNE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq);
-			//(D27.f[ BNW ])[kbw ] = f27TSE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq);
-			//(D27.f[ BSW ])[kbsw] = f27TNE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq);
+			//(D27.f[ DIR_P00   ])[k   ] = f27W    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(-vx1        )+c9over2*(-vx1        )*(-vx1        )-cusq);                                                                     
+			//(D27.f[ DIR_M00   ])[kw  ] = f27E    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*( vx1        )+c9over2*( vx1        )*( vx1        )-cusq);                                                                     
+			//(D27.f[ DIR_0P0   ])[k   ] = f27S    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(   -vx2     )+c9over2*(    -vx2    )*(    -vx2    )-cusq);
+			//(D27.f[ DIR_0M0   ])[ks  ] = f27N    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(    vx2     )+c9over2*(     vx2    )*(     vx2    )-cusq);
+			//(D27.f[ DIR_00P   ])[k   ] = f27B    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(        -vx3)+c9over2*(        -vx3)*(        -vx3)-cusq);
+			//(D27.f[ DIR_00M   ])[kb  ] = f27T    *(one-omegaD)+omegaD* c2over27* ConcD*(one+three*(         vx3)+c9over2*(         vx3)*(         vx3)-cusq);
+			//(D27.f[ DIR_PP0  ])[k   ] = f27SW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1-vx2    )+c9over2*(-vx1-vx2    )*(-vx1-vx2    )-cusq);
+			//(D27.f[ DIR_MM0  ])[ksw ] = f27NE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1+vx2    )+c9over2*( vx1+vx2    )*( vx1+vx2    )-cusq);
+			//(D27.f[ DIR_PM0  ])[ks  ] = f27NW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1+vx2    )+c9over2*(-vx1+vx2    )*(-vx1+vx2    )-cusq);
+			//(D27.f[ DIR_MP0  ])[kw  ] = f27SE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1-vx2    )+c9over2*( vx1-vx2    )*( vx1-vx2    )-cusq);
+			//(D27.f[ DIR_P0P  ])[k   ] = f27BW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1    -vx3)+c9over2*(-vx1    -vx3)*(-vx1    -vx3)-cusq);
+			//(D27.f[ DIR_M0M  ])[kbw ] = f27TE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1    +vx3)+c9over2*( vx1    +vx3)*( vx1    +vx3)-cusq);
+			//(D27.f[ DIR_P0M  ])[kb  ] = f27TW   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(-vx1    +vx3)+c9over2*(-vx1    +vx3)*(-vx1    +vx3)-cusq);
+			//(D27.f[ DIR_M0P  ])[kw  ] = f27BE   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*( vx1    -vx3)+c9over2*( vx1    -vx3)*( vx1    -vx3)-cusq);
+			//(D27.f[ DIR_0PP  ])[k   ] = f27BS   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(    -vx2-vx3)+c9over2*(    -vx2-vx3)*(    -vx2-vx3)-cusq);
+			//(D27.f[ DIR_0MM  ])[kbs ] = f27TN   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(     vx2+vx3)+c9over2*(     vx2+vx3)*(     vx2+vx3)-cusq);
+			//(D27.f[ DIR_0PM  ])[kb  ] = f27TS   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(    -vx2+vx3)+c9over2*(    -vx2+vx3)*(    -vx2+vx3)-cusq);
+			//(D27.f[ DIR_0MP  ])[ks  ] = f27BN   *(one-omegaD)+omegaD* c1over54* ConcD*(one+three*(     vx2-vx3)+c9over2*(     vx2-vx3)*(     vx2-vx3)-cusq);
+			//(D27.f[ DIR_000])[k   ] = f27ZERO *(one-omegaD)+omegaD* c8over27* ConcD*(one-cusq);
+			//(D27.f[ DIR_PPP ])[k   ] = f27BSW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1-vx2-vx3)+c9over2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cusq);
+			//(D27.f[ DIR_PMP ])[ks  ] = f27BNW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1+vx2-vx3)+c9over2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cusq);
+			//(D27.f[ DIR_PPM ])[kb  ] = f27TSW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1-vx2+vx3)+c9over2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cusq);
+			//(D27.f[ DIR_PMM ])[kbs ] = f27TNW  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*(-vx1+vx2+vx3)+c9over2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cusq);
+			//(D27.f[ DIR_MPP ])[kw  ] = f27BSE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1-vx2-vx3)+c9over2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cusq);
+			//(D27.f[ DIR_MMP ])[ksw ] = f27BNE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1+vx2-vx3)+c9over2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cusq);
+			//(D27.f[ DIR_MPM ])[kbw ] = f27TSE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1-vx2+vx3)+c9over2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cusq);
+			//(D27.f[ DIR_MMM ])[kbsw] = f27TNE  *(one-omegaD)+omegaD* c1over216*ConcD*(one+three*( vx1+vx2+vx3)+c9over2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cusq);
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 			real oMdrho = c0o1;//one; // comp special
@@ -861,33 +861,33 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D27.f[E])[k] = mfabb;
-			(D27.f[W])[kw] = mfcbb;
-			(D27.f[N])[k] = mfbab;
-			(D27.f[S])[ks] = mfbcb;
-			(D27.f[T])[k] = mfbba;
-			(D27.f[B])[kb] = mfbbc;
-			(D27.f[NE])[k] = mfaab;
-			(D27.f[SW])[ksw] = mfccb;
-			(D27.f[SE])[ks] = mfacb;
-			(D27.f[NW])[kw] = mfcab;
-			(D27.f[TE])[k] = mfaba;
-			(D27.f[BW])[kbw] = mfcbc;
-			(D27.f[BE])[kb] = mfabc;
-			(D27.f[TW])[kw] = mfcba;
-			(D27.f[TN])[k] = mfbaa;
-			(D27.f[BS])[kbs] = mfbcc;
-			(D27.f[BN])[kb] = mfbac;
-			(D27.f[TS])[ks] = mfbca;
-			(D27.f[REST])[k] = mfbbb;
-			(D27.f[TNE])[k] = mfaaa;
-			(D27.f[TSE])[ks] = mfaca;
-			(D27.f[BNE])[kb] = mfaac;
-			(D27.f[BSE])[kbs] = mfacc;
-			(D27.f[TNW])[kw] = mfcaa;
-			(D27.f[TSW])[ksw] = mfcca;
-			(D27.f[BNW])[kbw] = mfcac;
-			(D27.f[BSW])[kbsw] = mfccc;
+			(D27.f[DIR_P00])[k] = mfabb;
+			(D27.f[DIR_M00])[kw] = mfcbb;
+			(D27.f[DIR_0P0])[k] = mfbab;
+			(D27.f[DIR_0M0])[ks] = mfbcb;
+			(D27.f[DIR_00P])[k] = mfbba;
+			(D27.f[DIR_00M])[kb] = mfbbc;
+			(D27.f[DIR_PP0])[k] = mfaab;
+			(D27.f[DIR_MM0])[ksw] = mfccb;
+			(D27.f[DIR_PM0])[ks] = mfacb;
+			(D27.f[DIR_MP0])[kw] = mfcab;
+			(D27.f[DIR_P0P])[k] = mfaba;
+			(D27.f[DIR_M0M])[kbw] = mfcbc;
+			(D27.f[DIR_P0M])[kb] = mfabc;
+			(D27.f[DIR_M0P])[kw] = mfcba;
+			(D27.f[DIR_0PP])[k] = mfbaa;
+			(D27.f[DIR_0MM])[kbs] = mfbcc;
+			(D27.f[DIR_0PM])[kb] = mfbac;
+			(D27.f[DIR_0MP])[ks] = mfbca;
+			(D27.f[DIR_000])[k] = mfbbb;
+			(D27.f[DIR_PPP])[k] = mfaaa;
+			(D27.f[DIR_PMP])[ks] = mfaca;
+			(D27.f[DIR_PPM])[kb] = mfaac;
+			(D27.f[DIR_PMM])[kbs] = mfacc;
+			(D27.f[DIR_MPP])[kw] = mfcaa;
+			(D27.f[DIR_MMP])[ksw] = mfcca;
+			(D27.f[DIR_MPM])[kbw] = mfcac;
+			(D27.f[DIR_MMM])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 
 		}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
index 9833a2bcdd0b13cbf229f97e43eced4c87009242..a6d94de4fadb9a93a9e5fed63d87731b12ec2a07 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod27/ADIncomp27/ADIncomp27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_27(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
index 0cad7d9cdffc85f744dcc65ac38505b50c3befca..d49b0b48d20d976076a52f804d485b68da55348e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -38,63 +38,63 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions7 D7;
@@ -129,60 +129,60 @@ extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 			unsigned int kbs = neighborZ[ks];
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fW = (D.f[E])[k];//ke
-			real fE = (D.f[W])[kw];
-			real fS = (D.f[N])[k];//kn
-			real fN = (D.f[S])[ks];
-			real fB = (D.f[T])[k];//kt
-			real fT = (D.f[B])[kb];
-			real fSW = (D.f[NE])[k];//kne
-			real fNE = (D.f[SW])[ksw];
-			real fNW = (D.f[SE])[ks];//kse
-			real fSE = (D.f[NW])[kw];//knw
-			real fBW = (D.f[TE])[k];//kte
-			real fTE = (D.f[BW])[kbw];
-			real fTW = (D.f[BE])[kb];//kbe
-			real fBE = (D.f[TW])[kw];//ktw
-			real fBS = (D.f[TN])[k];//ktn
-			real fTN = (D.f[BS])[kbs];
-			real fTS = (D.f[BN])[kb];//kbn
-			real fBN = (D.f[TS])[ks];//kts
-			//real fZERO = (D.f[REST])[k];//kzero
-			real fBSW = (D.f[TNE])[k];//ktne
-			real fBNE = (D.f[TSW])[ksw];//ktsw
-			real fBNW = (D.f[TSE])[ks];//ktse
-			real fBSE = (D.f[TNW])[kw];//ktnw
-			real fTSW = (D.f[BNE])[kb];//kbne
-			real fTNE = (D.f[BSW])[kbsw];
-			real fTNW = (D.f[BSE])[kbs];//kbse
-			real fTSE = (D.f[BNW])[kbw];//kbnw
-										   //real fE    =  (D.f[E   ])[k  ];//ke
-										   //real fW    =  (D.f[W   ])[kw ];
-										   //real fN    =  (D.f[N   ])[k  ];//kn
-										   //real fS    =  (D.f[S   ])[ks ];
-										   //real fT    =  (D.f[T   ])[k  ];//kt
-										   //real fB    =  (D.f[B   ])[kb ];
-										   //real fNE   =  (D.f[NE  ])[k  ];//kne
-										   //real fSW   =  (D.f[SW  ])[ksw];
-										   //real fSE   =  (D.f[SE  ])[ks ];//kse
-										   //real fNW   =  (D.f[NW  ])[kw ];//knw
-										   //real fTE   =  (D.f[TE  ])[k  ];//kte
-										   //real fBW   =  (D.f[BW  ])[kbw];
-										   //real fBE   =  (D.f[BE  ])[kb ];//kbe
-										   //real fTW   =  (D.f[TW  ])[kw ];//ktw
-										   //real fTN   =  (D.f[TN  ])[k  ];//ktn
-										   //real fBS   =  (D.f[BS  ])[kbs];
-										   //real fBN   =  (D.f[BN  ])[kb ];//kbn
-										   //real fTS   =  (D.f[TS  ])[ks ];//kts
-										   //real fZERO =  (D.f[REST])[k  ];//kzero
-										   //real fTNE   = (D.f[TNE ])[k  ];//ktne
-										   //real fTSW   = (D.f[TSW ])[ksw];//ktsw
-										   //real fTSE   = (D.f[TSE ])[ks ];//ktse
-										   //real fTNW   = (D.f[TNW ])[kw ];//ktnw
-										   //real fBNE   = (D.f[BNE ])[kb ];//kbne
-										   //real fBSW   = (D.f[BSW ])[kbsw];
-										   //real fBSE   = (D.f[BSE ])[kbs];//kbse
-										   //real fBNW   = (D.f[BNW ])[kbw];//kbnw
+			real fW = (D.f[DIR_P00])[k];//ke
+			real fE = (D.f[DIR_M00])[kw];
+			real fS = (D.f[DIR_0P0])[k];//kn
+			real fN = (D.f[DIR_0M0])[ks];
+			real fB = (D.f[DIR_00P])[k];//kt
+			real fT = (D.f[DIR_00M])[kb];
+			real fSW = (D.f[DIR_PP0])[k];//kne
+			real fNE = (D.f[DIR_MM0])[ksw];
+			real fNW = (D.f[DIR_PM0])[ks];//kse
+			real fSE = (D.f[DIR_MP0])[kw];//knw
+			real fBW = (D.f[DIR_P0P])[k];//kte
+			real fTE = (D.f[DIR_M0M])[kbw];
+			real fTW = (D.f[DIR_P0M])[kb];//kbe
+			real fBE = (D.f[DIR_M0P])[kw];//ktw
+			real fBS = (D.f[DIR_0PP])[k];//ktn
+			real fTN = (D.f[DIR_0MM])[kbs];
+			real fTS = (D.f[DIR_0PM])[kb];//kbn
+			real fBN = (D.f[DIR_0MP])[ks];//kts
+			//real fZERO = (D.f[DIR_000])[k];//kzero
+			real fBSW = (D.f[DIR_PPP])[k];//ktne
+			real fBNE = (D.f[DIR_MMP])[ksw];//ktsw
+			real fBNW = (D.f[DIR_PMP])[ks];//ktse
+			real fBSE = (D.f[DIR_MPP])[kw];//ktnw
+			real fTSW = (D.f[DIR_PPM])[kb];//kbne
+			real fTNE = (D.f[DIR_MMM])[kbsw];
+			real fTNW = (D.f[DIR_PMM])[kbs];//kbse
+			real fTSE = (D.f[DIR_MPM])[kbw];//kbnw
+										   //real fE    =  (D.f[DIR_P00   ])[k  ];//ke
+										   //real fW    =  (D.f[DIR_M00   ])[kw ];
+										   //real fN    =  (D.f[DIR_0P0   ])[k  ];//kn
+										   //real fS    =  (D.f[DIR_0M0   ])[ks ];
+										   //real fT    =  (D.f[DIR_00P   ])[k  ];//kt
+										   //real fB    =  (D.f[DIR_00M   ])[kb ];
+										   //real fNE   =  (D.f[DIR_PP0  ])[k  ];//kne
+										   //real fSW   =  (D.f[DIR_MM0  ])[ksw];
+										   //real fSE   =  (D.f[DIR_PM0  ])[ks ];//kse
+										   //real fNW   =  (D.f[DIR_MP0  ])[kw ];//knw
+										   //real fTE   =  (D.f[DIR_P0P  ])[k  ];//kte
+										   //real fBW   =  (D.f[DIR_M0M  ])[kbw];
+										   //real fBE   =  (D.f[DIR_P0M  ])[kb ];//kbe
+										   //real fTW   =  (D.f[DIR_M0P  ])[kw ];//ktw
+										   //real fTN   =  (D.f[DIR_0PP  ])[k  ];//ktn
+										   //real fBS   =  (D.f[DIR_0MM  ])[kbs];
+										   //real fBN   =  (D.f[DIR_0PM  ])[kb ];//kbn
+										   //real fTS   =  (D.f[DIR_0MP  ])[ks ];//kts
+										   //real fZERO =  (D.f[DIR_000])[k  ];//kzero
+										   //real fTNE   = (D.f[DIR_PPP ])[k  ];//ktne
+										   //real fTSW   = (D.f[DIR_MMP ])[ksw];//ktsw
+										   //real fTSE   = (D.f[DIR_PMP ])[ks ];//ktse
+										   //real fTNW   = (D.f[DIR_MPP ])[kw ];//ktnw
+										   //real fBNE   = (D.f[DIR_PPM ])[kb ];//kbne
+										   //real fBSW   = (D.f[DIR_MMM ])[kbsw];
+										   //real fBSE   = (D.f[DIR_PMM ])[kbs];//kbse
+										   //real fBNW   = (D.f[DIR_MPM ])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real f7ZERO = (D7.f[0])[k];
 			real f7E = (D7.f[1])[k];
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
index fe489784861839d778d65e9ddf14ed449a9ad7e0..25a17ddbd7038635a2beb2c39212822cbf762034 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/AdvectionDiffusion/Incompressible/Mod7/ADIncomp7/ADIncomp7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
+__global__ void LB_Kernel_AD_Incomp_7(real diffusivity,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
index a9cf249541b2e42768c0ad72611f3f7626b71b5b..09196d13e94a2404ba280e8a8e9394f0a79e8211 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
@@ -38,63 +38,63 @@ extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -127,33 +127,33 @@ extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fE = (D.f[E])[k];//ke
-			real fW = (D.f[W])[kw];
-			real fN = (D.f[N])[k];//kn
-			real fS = (D.f[S])[ks];
-			real fT = (D.f[T])[k];//kt
-			real fB = (D.f[B])[kb];
-			real fNE = (D.f[NE])[k];//kne
-			real fSW = (D.f[SW])[ksw];
-			real fSE = (D.f[SE])[ks];//kse
-			real fNW = (D.f[NW])[kw];//knw
-			real fTE = (D.f[TE])[k];//kte
-			real fBW = (D.f[BW])[kbw];
-			real fBE = (D.f[BE])[kb];//kbe
-			real fTW = (D.f[TW])[kw];//ktw
-			real fTN = (D.f[TN])[k];//ktn
-			real fBS = (D.f[BS])[kbs];
-			real fBN = (D.f[BN])[kb];//kbn
-			real fTS = (D.f[TS])[ks];//kts
-			real fZERO = (D.f[REST])[k];//kzero
-			real fTNE = (D.f[TNE])[k];//ktne
-			real fTSW = (D.f[TSW])[ksw];//ktsw
-			real fTSE = (D.f[TSE])[ks];//ktse
-			real fTNW = (D.f[TNW])[kw];//ktnw
-			real fBNE = (D.f[BNE])[kb];//kbne
-			real fBSW = (D.f[BSW])[kbsw];
-			real fBSE = (D.f[BSE])[kbs];//kbse
-			real fBNW = (D.f[BNW])[kbw];//kbnw
+			real fE = (D.f[DIR_P00])[k];//ke
+			real fW = (D.f[DIR_M00])[kw];
+			real fN = (D.f[DIR_0P0])[k];//kn
+			real fS = (D.f[DIR_0M0])[ks];
+			real fT = (D.f[DIR_00P])[k];//kt
+			real fB = (D.f[DIR_00M])[kb];
+			real fNE = (D.f[DIR_PP0])[k];//kne
+			real fSW = (D.f[DIR_MM0])[ksw];
+			real fSE = (D.f[DIR_PM0])[ks];//kse
+			real fNW = (D.f[DIR_MP0])[kw];//knw
+			real fTE = (D.f[DIR_P0P])[k];//kte
+			real fBW = (D.f[DIR_M0M])[kbw];
+			real fBE = (D.f[DIR_P0M])[kb];//kbe
+			real fTW = (D.f[DIR_M0P])[kw];//ktw
+			real fTN = (D.f[DIR_0PP])[k];//ktn
+			real fBS = (D.f[DIR_0MM])[kbs];
+			real fBN = (D.f[DIR_0PM])[kb];//kbn
+			real fTS = (D.f[DIR_0MP])[ks];//kts
+			real fZERO = (D.f[DIR_000])[k];//kzero
+			real fTNE = (D.f[DIR_PPP])[k];//ktne
+			real fTSW = (D.f[DIR_MMP])[ksw];//ktsw
+			real fTSE = (D.f[DIR_PMP])[ks];//ktse
+			real fTNW = (D.f[DIR_MPP])[kw];//ktnw
+			real fBNE = (D.f[DIR_PPM])[kb];//kbne
+			real fBSW = (D.f[DIR_MMM])[kbsw];
+			real fBSE = (D.f[DIR_PMM])[kbs];//kbse
+			real fBNW = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 			real drho = (fTNE + fBSW) + (fTSW + fBNE) + (fTSE + fBNW) + (fTNW + fBSE) + (fNE + fSW) + (fNW + fSE) + (fTE + fBW) + (fBE + fTW) + (fTN + fBS) + (fBN + fTS) + (fE + fW) + (fN + fS) + (fT + fB) + fZERO;
 			real rho = drho + c1o1;
@@ -210,33 +210,33 @@ extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 
 
 			//////////////////////////////////////////////////////////////////////////                            
-			(D.f[E])[k] = fW;
-			(D.f[W])[kw] = fE;
-			(D.f[N])[k] = fS;
-			(D.f[S])[ks] = fN;
-			(D.f[T])[k] = fB;
-			(D.f[B])[kb] = fT;
-			(D.f[NE])[k] = fSW;
-			(D.f[SW])[ksw] = fNE;
-			(D.f[SE])[ks] = fNW;
-			(D.f[NW])[kw] = fSE;
-			(D.f[TE])[k] = fBW;
-			(D.f[BW])[kbw] = fTE;
-			(D.f[BE])[kb] = fTW;
-			(D.f[TW])[kw] = fBE;
-			(D.f[TN])[k] = fBS;
-			(D.f[BS])[kbs] = fTN;
-			(D.f[BN])[kb] = fTS;
-			(D.f[TS])[ks] = fBN;
-			(D.f[REST])[k] = fZERO;
-			(D.f[TNE])[k] = fBSW;
-			(D.f[TSE])[ks] = fBNW;
-			(D.f[BNE])[kb] = fTSW;
-			(D.f[BSE])[kbs] = fTNW;
-			(D.f[TNW])[kw] = fBSE;
-			(D.f[TSW])[ksw] = fBNE;
-			(D.f[BNW])[kbw] = fTSE;
-			(D.f[BSW])[kbsw] = fTNE;
+			(D.f[DIR_P00])[k] = fW;
+			(D.f[DIR_M00])[kw] = fE;
+			(D.f[DIR_0P0])[k] = fS;
+			(D.f[DIR_0M0])[ks] = fN;
+			(D.f[DIR_00P])[k] = fB;
+			(D.f[DIR_00M])[kb] = fT;
+			(D.f[DIR_PP0])[k] = fSW;
+			(D.f[DIR_MM0])[ksw] = fNE;
+			(D.f[DIR_PM0])[ks] = fNW;
+			(D.f[DIR_MP0])[kw] = fSE;
+			(D.f[DIR_P0P])[k] = fBW;
+			(D.f[DIR_M0M])[kbw] = fTE;
+			(D.f[DIR_P0M])[kb] = fTW;
+			(D.f[DIR_M0P])[kw] = fBE;
+			(D.f[DIR_0PP])[k] = fBS;
+			(D.f[DIR_0MM])[kbs] = fTN;
+			(D.f[DIR_0PM])[kb] = fTS;
+			(D.f[DIR_0MP])[ks] = fBN;
+			(D.f[DIR_000])[k] = fZERO;
+			(D.f[DIR_PPP])[k] = fBSW;
+			(D.f[DIR_PMP])[ks] = fBNW;
+			(D.f[DIR_PPM])[kb] = fTSW;
+			(D.f[DIR_PMM])[kbs] = fTNW;
+			(D.f[DIR_MPP])[kw] = fBSE;
+			(D.f[DIR_MMP])[ksw] = fBNE;
+			(D.f[DIR_MPM])[kbw] = fTSE;
+			(D.f[DIR_MMM])[kbsw] = fTNE;
 			//////////////////////////////////////////////////////////////////////////                            
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
index fb6ef2eea656f29e9e788ca80dbf8aef801055d5..59a5240862ed92a9ea3e9187c503ee9233da7e5a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGK/BGKCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Comp_SP_27(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
index 368785c5cc50bb9edc30cda9129fe655278f2f0d..325f65ece9baddf88adc91baa753bdfc4bd0eced 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
+__global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -38,63 +38,63 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -127,33 +127,33 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -793,33 +793,33 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
index 82e41dd80ab03e0d7ce8cee854fffa009c7fefee..9e991ffa4b16e0df78fe23f6ee5a1e0678919cd7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/BGKPlus/BGKPlusCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Comp_SP_27(	real omega,
+__global__ void LB_Kernel_BGK_Plus_Comp_SP_27(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
index ef29cf9593578696f74bbd7648f99f0b6038a12c..3f69fa47288343fbdd91e77dbb7f154501349098 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
+__global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];//[ke   ];
-			real mfabb = (D.f[W   ])[kw ];//[kw   ];
-			real mfbcb = (D.f[N   ])[k  ];//[kn   ];
-			real mfbab = (D.f[S   ])[ks ];//[ks   ];
-			real mfbbc = (D.f[T   ])[k  ];//[kt   ];
-			real mfbba = (D.f[B   ])[kb ];//[kb   ];
-			real mfccb = (D.f[NE  ])[k  ];//[kne  ];
-			real mfaab = (D.f[SW  ])[ksw];//[ksw  ];
-			real mfcab = (D.f[SE  ])[ks ];//[kse  ];
-			real mfacb = (D.f[NW  ])[kw ];//[knw  ];
-			real mfcbc = (D.f[TE  ])[k  ];//[kte  ];
-			real mfaba = (D.f[BW  ])[kbw];//[kbw  ];
-			real mfcba = (D.f[BE  ])[kb ];//[kbe  ];
-			real mfabc = (D.f[TW  ])[kw ];//[ktw  ];
-			real mfbcc = (D.f[TN  ])[k  ];//[ktn  ];
-			real mfbaa = (D.f[BS  ])[kbs];//[kbs  ];
-			real mfbca = (D.f[BN  ])[kb ];//[kbn  ];
-			real mfbac = (D.f[TS  ])[ks ];//[kts  ];
-			real mfbbb = (D.f[REST])[k  ];//[kzero];
-			real mfccc = (D.f[TNE ])[k  ];//[ktne ];
-			real mfaac = (D.f[TSW ])[ksw];//[ktsw ];
-			real mfcac = (D.f[TSE ])[ks ];//[ktse ];
-			real mfacc = (D.f[TNW ])[kw ];//[ktnw ];
-			real mfcca = (D.f[BNE ])[kb ];//[kbne ];
-			real mfaaa = (D.f[BSW ])[kbsw];//[kbsw ]
-			real mfcaa = (D.f[BSE ])[kbs];//[kbse ];
-			real mfaca = (D.f[BNW ])[kbw];//[kbnw ];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];//[ke   ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];//[kw   ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];//[kn   ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];//[ks   ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];//[kt   ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];//[kb   ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];//[kne  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];//[ksw  ];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];//[kse  ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];//[knw  ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];//[kte  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];//[kbw  ];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];//[kbe  ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];//[ktw  ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];//[ktn  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];//[kbs  ];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];//[kbn  ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];//[kts  ];
+			real mfbbb = (D.f[DIR_000])[k  ];//[kzero];
+			real mfccc = (D.f[DIR_PPP ])[k  ];//[ktne ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];//[ktsw ];
+			real mfcac = (D.f[DIR_PMP ])[ks ];//[ktse ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];//[ktnw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];//[kbne ];
+			real mfaaa = (D.f[DIR_MMM ])[kbsw];//[kbsw ]
+			real mfcaa = (D.f[DIR_PMM ])[kbs];//[kbse ];
+			real mfaca = (D.f[DIR_MPM ])[kbw];//[kbnw ];
 			////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc+mfaaa + mfaca+mfcac + mfacc+mfcaa + mfaac+mfcca + 
 						   mfbac+mfbca + mfbaa+mfbcc + mfabc+mfcba + mfaba+mfcbc + mfacb+mfcab + mfaab+mfccb +
@@ -733,33 +733,33 @@ extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;                                                                     
-			(D.f[ W   ])[kw  ] = mfcbb;                                                                   
-			(D.f[ N   ])[k   ] = mfbab;
-			(D.f[ S   ])[ks  ] = mfbcb;
-			(D.f[ T   ])[k   ] = mfbba;
-			(D.f[ B   ])[kb  ] = mfbbc;
-			(D.f[ NE  ])[k   ] = mfaab;
-			(D.f[ SW  ])[ksw ] = mfccb;
-			(D.f[ SE  ])[ks  ] = mfacb;
-			(D.f[ NW  ])[kw  ] = mfcab;
-			(D.f[ TE  ])[k   ] = mfaba;
-			(D.f[ BW  ])[kbw ] = mfcbc;
-			(D.f[ BE  ])[kb  ] = mfabc;
-			(D.f[ TW  ])[kw  ] = mfcba;
-			(D.f[ TN  ])[k   ] = mfbaa;
-			(D.f[ BS  ])[kbs ] = mfbcc;
-			(D.f[ BN  ])[kb  ] = mfbac;
-			(D.f[ TS  ])[ks  ] = mfbca;
-			(D.f[ REST])[k   ] = mfbbb;
-			(D.f[ TNE ])[k   ] = mfaaa;
-			(D.f[ TSE ])[ks  ] = mfaca;
-			(D.f[ BNE ])[kb  ] = mfaac;
-			(D.f[ BSE ])[kbs ] = mfacc;
-			(D.f[ TNW ])[kw  ] = mfcaa;
-			(D.f[ TSW ])[ksw ] = mfcca;
-			(D.f[ BNW ])[kbw ] = mfcac;
-			(D.f[ BSW ])[kbsw] = mfccc;
+			(D.f[ DIR_P00   ])[k   ] = mfabb;                                                                     
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;                                                                   
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;
+			(D.f[ DIR_00P   ])[k   ] = mfbba;
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[ DIR_000])[k   ] = mfbbb;
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
index 6c9f8852ae7b0aab2b82897cb5f1905f780b983d..8a49bd02af3ab420b42bc257e8668dd3ff9eca2c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cascade/CascadeCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cascade_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Cascade_Comp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
index 422d642def822e6e37dbb7efbcb9a1439af677fd..ad2ffdf4170d98125e6758c0e2f548122093cea6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 			unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real f_E = (D.f[E])[ke];// +  c2over27 ;
-			real f_W = (D.f[W])[kw];// +  c2over27 ;
-			real f_N = (D.f[N])[kn];// +  c2over27 ;
-			real f_S = (D.f[S])[ks];// +  c2over27 ;
-			real f_T = (D.f[T])[kt];// +  c2over27 ;
-			real f_B = (D.f[B])[kb];// +  c2over27 ;
-			real f_NE = (D.f[NE])[kne];// +  c1over54 ;
-			real f_SS = (D.f[SW])[ksw];// +  c1over54 ;
-			real f_SE = (D.f[SE])[kse];// +  c1over54 ;
-			real f_NW = (D.f[NW])[knw];// +  c1over54 ;
-			real f_TE = (D.f[TE])[kte];// +  c1over54 ;
-			real f_BW = (D.f[BW])[kbw];// +  c1over54 ;
-			real f_EB = (D.f[BE])[kbe];// +  c1over54 ;
-			real f_TW = (D.f[TW])[ktw];// +  c1over54 ;
-			real f_TN = (D.f[TN])[ktn];// +  c1over54 ;
-			real f_BS = (D.f[BS])[kbs];// +  c1over54 ;
-			real f_BN = (D.f[BN])[kbn];// +  c1over54 ;
-			real f_TS = (D.f[TS])[kts];// +  c1over54 ;
-			real f_R = (D.f[REST])[kzero];// +  c8over27 ;
-			real f_TNE = (D.f[TNE])[ktne];// +  c1over216;
-			real f_TSW = (D.f[TSW])[ktsw];// +  c1over216;
-			real f_TSE = (D.f[TSE])[ktse];// +  c1over216;
-			real f_TNW = (D.f[TNW])[ktnw];// +  c1over216;
-			real f_BNE = (D.f[BNE])[kbne];// +  c1over216;
-			real f_BSW = (D.f[BSW])[kbsw];// +  c1over216;
-			real f_BSE = (D.f[BSE])[kbse];// +  c1over216;
-			real f_BNW = (D.f[BNW])[kbnw];// +  c1over216;
+			real f_E = (D.f[DIR_P00])[ke];// +  c2over27 ;
+			real f_W = (D.f[DIR_M00])[kw];// +  c2over27 ;
+			real f_N = (D.f[DIR_0P0])[kn];// +  c2over27 ;
+			real f_S = (D.f[DIR_0M0])[ks];// +  c2over27 ;
+			real f_T = (D.f[DIR_00P])[kt];// +  c2over27 ;
+			real f_B = (D.f[DIR_00M])[kb];// +  c2over27 ;
+			real f_NE = (D.f[DIR_PP0])[kne];// +  c1over54 ;
+			real f_SS = (D.f[DIR_MM0])[ksw];// +  c1over54 ;
+			real f_SE = (D.f[DIR_PM0])[kse];// +  c1over54 ;
+			real f_NW = (D.f[DIR_MP0])[knw];// +  c1over54 ;
+			real f_TE = (D.f[DIR_P0P])[kte];// +  c1over54 ;
+			real f_BW = (D.f[DIR_M0M])[kbw];// +  c1over54 ;
+			real f_EB = (D.f[DIR_P0M])[kbe];// +  c1over54 ;
+			real f_TW = (D.f[DIR_M0P])[ktw];// +  c1over54 ;
+			real f_TN = (D.f[DIR_0PP])[ktn];// +  c1over54 ;
+			real f_BS = (D.f[DIR_0MM])[kbs];// +  c1over54 ;
+			real f_BN = (D.f[DIR_0PM])[kbn];// +  c1over54 ;
+			real f_TS = (D.f[DIR_0MP])[kts];// +  c1over54 ;
+			real f_R = (D.f[DIR_000])[kzero];// +  c8over27 ;
+			real f_TNE = (D.f[DIR_PPP])[ktne];// +  c1over216;
+			real f_TSW = (D.f[DIR_MMP])[ktsw];// +  c1over216;
+			real f_TSE = (D.f[DIR_PMP])[ktse];// +  c1over216;
+			real f_TNW = (D.f[DIR_MPP])[ktnw];// +  c1over216;
+			real f_BNE = (D.f[DIR_PPM])[kbne];// +  c1over216;
+			real f_BSW = (D.f[DIR_MMM])[kbsw];// +  c1over216;
+			real f_BSE = (D.f[DIR_PMM])[kbse];// +  c1over216;
+			real f_BNW = (D.f[DIR_MPM])[kbnw];// +  c1over216;
 										   ////////////////////////////////////////////////////////////////////////////////////
 			real fx = c0o1;
 			real fy = c0o1;
@@ -874,33 +874,33 @@ extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[ke] = mfabb;// -  c2over27 ;//                                                                     
-			(D.f[W])[kw] = mfcbb;// -  c2over27 ;                                                                     
-			(D.f[N])[kn] = mfbab;// -  c2over27 ;
-			(D.f[S])[ks] = mfbcb;// -  c2over27 ;
-			(D.f[T])[kt] = mfbba;// -  c2over27 ;
-			(D.f[B])[kb] = mfbbc;// -  c2over27 ;
-			(D.f[NE])[kne] = mfaab;// -  c1over54 ;
-			(D.f[SW])[ksw] = mfccb;// -  c1over54 ;
-			(D.f[SE])[kse] = mfacb;// -  c1over54 ;
-			(D.f[NW])[knw] = mfcab;// -  c1over54 ;
-			(D.f[TE])[kte] = mfaba;// -  c1over54 ;
-			(D.f[BW])[kbw] = mfcbc;// -  c1over54 ;
-			(D.f[BE])[kbe] = mfabc;// -  c1over54 ;
-			(D.f[TW])[ktw] = mfcba;// -  c1over54 ;
-			(D.f[TN])[ktn] = mfbaa;// -  c1over54 ;
-			(D.f[BS])[kbs] = mfbcc;// -  c1over54 ;
-			(D.f[BN])[kbn] = mfbac;// -  c1over54 ;
-			(D.f[TS])[kts] = mfbca;// -  c1over54 ;
-			(D.f[REST])[kzero] = mfbbb;// -  c8over27 ;
-			(D.f[TNE])[ktne] = mfaaa;// -  c1over216;
-			(D.f[TSE])[ktse] = mfaca;// -  c1over216;
-			(D.f[BNE])[kbne] = mfaac;// -  c1over216;
-			(D.f[BSE])[kbse] = mfacc;// -  c1over216;
-			(D.f[TNW])[ktnw] = mfcaa;// -  c1over216;
-			(D.f[TSW])[ktsw] = mfcca;// -  c1over216;
-			(D.f[BNW])[kbnw] = mfcac;// -  c1over216;
-			(D.f[BSW])[kbsw] = mfccc;// -  c1over216;
+			(D.f[DIR_P00])[ke] = mfabb;// -  c2over27 ;//                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;// -  c2over27 ;                                                                     
+			(D.f[DIR_0P0])[kn] = mfbab;// -  c2over27 ;
+			(D.f[DIR_0M0])[ks] = mfbcb;// -  c2over27 ;
+			(D.f[DIR_00P])[kt] = mfbba;// -  c2over27 ;
+			(D.f[DIR_00M])[kb] = mfbbc;// -  c2over27 ;
+			(D.f[DIR_PP0])[kne] = mfaab;// -  c1over54 ;
+			(D.f[DIR_MM0])[ksw] = mfccb;// -  c1over54 ;
+			(D.f[DIR_PM0])[kse] = mfacb;// -  c1over54 ;
+			(D.f[DIR_MP0])[knw] = mfcab;// -  c1over54 ;
+			(D.f[DIR_P0P])[kte] = mfaba;// -  c1over54 ;
+			(D.f[DIR_M0M])[kbw] = mfcbc;// -  c1over54 ;
+			(D.f[DIR_P0M])[kbe] = mfabc;// -  c1over54 ;
+			(D.f[DIR_M0P])[ktw] = mfcba;// -  c1over54 ;
+			(D.f[DIR_0PP])[ktn] = mfbaa;// -  c1over54 ;
+			(D.f[DIR_0MM])[kbs] = mfbcc;// -  c1over54 ;
+			(D.f[DIR_0PM])[kbn] = mfbac;// -  c1over54 ;
+			(D.f[DIR_0MP])[kts] = mfbca;// -  c1over54 ;
+			(D.f[DIR_000])[kzero] = mfbbb;// -  c8over27 ;
+			(D.f[DIR_PPP])[ktne] = mfaaa;// -  c1over216;
+			(D.f[DIR_PMP])[ktse] = mfaca;// -  c1over216;
+			(D.f[DIR_PPM])[kbne] = mfaac;// -  c1over216;
+			(D.f[DIR_PMM])[kbse] = mfacc;// -  c1over216;
+			(D.f[DIR_MPP])[ktnw] = mfcaa;// -  c1over216;
+			(D.f[DIR_MMP])[ktsw] = mfcca;// -  c1over216;
+			(D.f[DIR_MPM])[kbnw] = mfcac;// -  c1over216;
+			(D.f[DIR_MMM])[kbsw] = mfccc;// -  c1over216;
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
index 7146787201baf348c84b593772185dfa4d1cd506..cf6a926698e4082383b04c5f1e2d886c6dca6380 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/Cumulant/CumulantCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_Comp_SP_27(real s9,
+__global__ void LB_Kernel_Cum_Comp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
index a52e771c2b5294eee72c9c81d29b8f2e8de7e49e..681dbff2ba37a1e0de56341b39cc2dec791f656b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
@@ -42,63 +42,63 @@ extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -160,33 +160,33 @@ extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1061,33 +1061,33 @@ extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
index 27f981228c1e2be8e29d176d0a234534c1e61b5b..5f23194d561d106cf2493c36199444f8da15efd7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
+__global__ void LB_Kernel_Cumulant_D3Q27All4(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
index a8ec8d7e25beb6b0f88c53ec9b2eade11f760b4b..93d57d6c9871d66537f25b9188467d46e3b3d05c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
+__global__ void LB_Kernel_CumulantK15Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -39,63 +39,63 @@ extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1242,33 +1242,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15Comp(real omega,
 					((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) + mfbbb;
 			mfbbb += drho - drhoPost;
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
index 208fbec553507812bfe4339577734292b248c027..149aab34a16673ddd707427c222a56cf18d127ca 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15/CumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15Comp(	real omega,
+__global__ void LB_Kernel_CumulantK15Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
index de7f5ce74c4dfd5ea68bb6bdd45fcc3d9fa3ab53..d2a2f61df902cfd7c5ef52b09f8e7738a108615e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -39,63 +39,63 @@ extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -156,33 +156,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -912,33 +912,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
index caeb68e3077393bc786a022eeb3c60e6d0a3c059..e1d60ab0a181b88fba81bdb5ff649d7971c235a7 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Bulk/CumulantK15BulkComp_Device.cuh
@@ -5,7 +5,7 @@
 #include <curand.h>
 
 
-extern "C" __global__ void LB_Kernel_CumulantK15BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK15BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
index a65aa95802971e40af9f50c93fda578d4be24ebb..c2144d324aa3378e8fc9fc5b511bbed385b48a84 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
+__global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -893,33 +893,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omegaIn,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;
-			(D.f[W])[kw] = mfcbb;
-			(D.f[N])[k] = mfbab;
-			(D.f[S])[ks] = mfbcb;
-			(D.f[T])[k] = mfbba;
-			(D.f[B])[kb] = mfbbc;
-			(D.f[NE])[k] = mfaab;
-			(D.f[SW])[ksw] = mfccb;
-			(D.f[SE])[ks] = mfacb;
-			(D.f[NW])[kw] = mfcab;
-			(D.f[TE])[k] = mfaba;
-			(D.f[BW])[kbw] = mfcbc;
-			(D.f[BE])[kb] = mfabc;
-			(D.f[TW])[kw] = mfcba;
-			(D.f[TN])[k] = mfbaa;
-			(D.f[BS])[kbs] = mfbcc;
-			(D.f[BN])[kb] = mfbac;
-			(D.f[TS])[ks] = mfbca;
-			(D.f[REST])[k] = mfbbb;
-			(D.f[TNE])[k] = mfaaa;
-			(D.f[TSE])[ks] = mfaca;
-			(D.f[BNE])[kb] = mfaac;
-			(D.f[BSE])[kbs] = mfacc;
-			(D.f[TNW])[kw] = mfcaa;
-			(D.f[TSW])[ksw] = mfcca;
-			(D.f[BNW])[kbw] = mfcac;
-			(D.f[BSW])[kbsw] = mfccc;
+			(D.f[DIR_P00])[k] = mfabb;
+			(D.f[DIR_M00])[kw] = mfcbb;
+			(D.f[DIR_0P0])[k] = mfbab;
+			(D.f[DIR_0M0])[ks] = mfbcb;
+			(D.f[DIR_00P])[k] = mfbba;
+			(D.f[DIR_00M])[kb] = mfbbc;
+			(D.f[DIR_PP0])[k] = mfaab;
+			(D.f[DIR_MM0])[ksw] = mfccb;
+			(D.f[DIR_PM0])[ks] = mfacb;
+			(D.f[DIR_MP0])[kw] = mfcab;
+			(D.f[DIR_P0P])[k] = mfaba;
+			(D.f[DIR_M0M])[kbw] = mfcbc;
+			(D.f[DIR_P0M])[kb] = mfabc;
+			(D.f[DIR_M0P])[kw] = mfcba;
+			(D.f[DIR_0PP])[k] = mfbaa;
+			(D.f[DIR_0MM])[kbs] = mfbcc;
+			(D.f[DIR_0PM])[kb] = mfbac;
+			(D.f[DIR_0MP])[ks] = mfbca;
+			(D.f[DIR_000])[k] = mfbbb;
+			(D.f[DIR_PPP])[k] = mfaaa;
+			(D.f[DIR_PMP])[ks] = mfaca;
+			(D.f[DIR_PPM])[kb] = mfaac;
+			(D.f[DIR_PMM])[kbs] = mfacc;
+			(D.f[DIR_MPP])[kw] = mfcaa;
+			(D.f[DIR_MMP])[ksw] = mfcca;
+			(D.f[DIR_MPM])[kbw] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
index a55ec0ada7e0a05d05285bc29406d1a497467a54..3a526112e56947a77d5ac337052d62e95dd4e578 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK15Sponge/CumulantK15SpongeComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15SpongeComp(real omega,
+__global__ void LB_Kernel_CumulantK15SpongeComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
index 9bc2bb5877197ab21c2cd82b4b672c8660a12211..7cf27aa883cbfd3a0e4a0a36fa61649a62d06eeb 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Kernel_CumulantK17Comp(real omega,
+__global__ void LB_Kernel_CumulantK17Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -41,63 +41,63 @@ extern "C" __global__ void LB_Kernel_CumulantK17Comp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -158,33 +158,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17Comp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1007,33 +1007,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17Comp(real omega,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
index 4e6fea77b1e0364b5f56d2fd560d245f60343fe1..f44842057d554498b0b5d4c733e2425e524a3b75 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17/CumulantK17Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17Comp(	real omega,
+__global__ void LB_Kernel_CumulantK17Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
index dce86433b943a54192a41d5ea1831b4e3fe76ce1..cec04116ae4b411b1b3816ff4a8cab606c92491e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -886,33 +886,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
index d7f1f263be6664a6d39c57c98ba63699da662c2b..04448787256cb1cfeef46c5d9b7146918e6a4c38 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17BulkComp(real omega,
+__global__ void LB_Kernel_CumulantK17BulkComp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
index 6aab2076860fd2ea22cec8c3948ae2639f5c1780..e90d1b34c4b0b4fce65ff9edf1632f98202337ee 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cu
@@ -40,7 +40,7 @@ using namespace vf::lbm::dir;
 #include "Kernel/ChimeraTransformation.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
+__global__ void LB_Kernel_CumulantK17CompChim(
 	real omega,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -88,61 +88,61 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
         //!
         Distributions27 dist;
         if (isEvenTimestep) {
-            dist.f[E]    = &distributions[E * size_Mat];
-            dist.f[W]    = &distributions[W * size_Mat];
-            dist.f[N]    = &distributions[N * size_Mat];
-            dist.f[S]    = &distributions[S * size_Mat];
-            dist.f[T]    = &distributions[T * size_Mat];
-            dist.f[B]    = &distributions[B * size_Mat];
-            dist.f[NE]   = &distributions[NE * size_Mat];
-            dist.f[SW]   = &distributions[SW * size_Mat];
-            dist.f[SE]   = &distributions[SE * size_Mat];
-            dist.f[NW]   = &distributions[NW * size_Mat];
-            dist.f[TE]   = &distributions[TE * size_Mat];
-            dist.f[BW]   = &distributions[BW * size_Mat];
-            dist.f[BE]   = &distributions[BE * size_Mat];
-            dist.f[TW]   = &distributions[TW * size_Mat];
-            dist.f[TN]   = &distributions[TN * size_Mat];
-            dist.f[BS]   = &distributions[BS * size_Mat];
-            dist.f[BN]   = &distributions[BN * size_Mat];
-            dist.f[TS]   = &distributions[TS * size_Mat];
-            dist.f[REST] = &distributions[REST * size_Mat];
-            dist.f[TNE]  = &distributions[TNE * size_Mat];
-            dist.f[TSW]  = &distributions[TSW * size_Mat];
-            dist.f[TSE]  = &distributions[TSE * size_Mat];
-            dist.f[TNW]  = &distributions[TNW * size_Mat];
-            dist.f[BNE]  = &distributions[BNE * size_Mat];
-            dist.f[BSW]  = &distributions[BSW * size_Mat];
-            dist.f[BSE]  = &distributions[BSE * size_Mat];
-            dist.f[BNW]  = &distributions[BNW * size_Mat];
+            dist.f[DIR_P00]    = &distributions[DIR_P00 * size_Mat];
+            dist.f[DIR_M00]    = &distributions[DIR_M00 * size_Mat];
+            dist.f[DIR_0P0]    = &distributions[DIR_0P0 * size_Mat];
+            dist.f[DIR_0M0]    = &distributions[DIR_0M0 * size_Mat];
+            dist.f[DIR_00P]    = &distributions[DIR_00P * size_Mat];
+            dist.f[DIR_00M]    = &distributions[DIR_00M * size_Mat];
+            dist.f[DIR_PP0]   = &distributions[DIR_PP0 * size_Mat];
+            dist.f[DIR_MM0]   = &distributions[DIR_MM0 * size_Mat];
+            dist.f[DIR_PM0]   = &distributions[DIR_PM0 * size_Mat];
+            dist.f[DIR_MP0]   = &distributions[DIR_MP0 * size_Mat];
+            dist.f[DIR_P0P]   = &distributions[DIR_P0P * size_Mat];
+            dist.f[DIR_M0M]   = &distributions[DIR_M0M * size_Mat];
+            dist.f[DIR_P0M]   = &distributions[DIR_P0M * size_Mat];
+            dist.f[DIR_M0P]   = &distributions[DIR_M0P * size_Mat];
+            dist.f[DIR_0PP]   = &distributions[DIR_0PP * size_Mat];
+            dist.f[DIR_0MM]   = &distributions[DIR_0MM * size_Mat];
+            dist.f[DIR_0PM]   = &distributions[DIR_0PM * size_Mat];
+            dist.f[DIR_0MP]   = &distributions[DIR_0MP * size_Mat];
+            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+            dist.f[DIR_PPP]  = &distributions[DIR_PPP * size_Mat];
+            dist.f[DIR_MMP]  = &distributions[DIR_MMP * size_Mat];
+            dist.f[DIR_PMP]  = &distributions[DIR_PMP * size_Mat];
+            dist.f[DIR_MPP]  = &distributions[DIR_MPP * size_Mat];
+            dist.f[DIR_PPM]  = &distributions[DIR_PPM * size_Mat];
+            dist.f[DIR_MMM]  = &distributions[DIR_MMM * size_Mat];
+            dist.f[DIR_PMM]  = &distributions[DIR_PMM * size_Mat];
+            dist.f[DIR_MPM]  = &distributions[DIR_MPM * size_Mat];
         } else {
-            dist.f[W]    = &distributions[E * size_Mat];
-            dist.f[E]    = &distributions[W * size_Mat];
-            dist.f[S]    = &distributions[N * size_Mat];
-            dist.f[N]    = &distributions[S * size_Mat];
-            dist.f[B]    = &distributions[T * size_Mat];
-            dist.f[T]    = &distributions[B * size_Mat];
-            dist.f[SW]   = &distributions[NE * size_Mat];
-            dist.f[NE]   = &distributions[SW * size_Mat];
-            dist.f[NW]   = &distributions[SE * size_Mat];
-            dist.f[SE]   = &distributions[NW * size_Mat];
-            dist.f[BW]   = &distributions[TE * size_Mat];
-            dist.f[TE]   = &distributions[BW * size_Mat];
-            dist.f[TW]   = &distributions[BE * size_Mat];
-            dist.f[BE]   = &distributions[TW * size_Mat];
-            dist.f[BS]   = &distributions[TN * size_Mat];
-            dist.f[TN]   = &distributions[BS * size_Mat];
-            dist.f[TS]   = &distributions[BN * size_Mat];
-            dist.f[BN]   = &distributions[TS * size_Mat];
-            dist.f[REST] = &distributions[REST * size_Mat];
-            dist.f[BSW]  = &distributions[TNE * size_Mat];
-            dist.f[BNE]  = &distributions[TSW * size_Mat];
-            dist.f[BNW]  = &distributions[TSE * size_Mat];
-            dist.f[BSE]  = &distributions[TNW * size_Mat];
-            dist.f[TSW]  = &distributions[BNE * size_Mat];
-            dist.f[TNE]  = &distributions[BSW * size_Mat];
-            dist.f[TNW]  = &distributions[BSE * size_Mat];
-            dist.f[TSE]  = &distributions[BNW * size_Mat];
+            dist.f[DIR_M00]    = &distributions[DIR_P00 * size_Mat];
+            dist.f[DIR_P00]    = &distributions[DIR_M00 * size_Mat];
+            dist.f[DIR_0M0]    = &distributions[DIR_0P0 * size_Mat];
+            dist.f[DIR_0P0]    = &distributions[DIR_0M0 * size_Mat];
+            dist.f[DIR_00M]    = &distributions[DIR_00P * size_Mat];
+            dist.f[DIR_00P]    = &distributions[DIR_00M * size_Mat];
+            dist.f[DIR_MM0]   = &distributions[DIR_PP0 * size_Mat];
+            dist.f[DIR_PP0]   = &distributions[DIR_MM0 * size_Mat];
+            dist.f[DIR_MP0]   = &distributions[DIR_PM0 * size_Mat];
+            dist.f[DIR_PM0]   = &distributions[DIR_MP0 * size_Mat];
+            dist.f[DIR_M0M]   = &distributions[DIR_P0P * size_Mat];
+            dist.f[DIR_P0P]   = &distributions[DIR_M0M * size_Mat];
+            dist.f[DIR_M0P]   = &distributions[DIR_P0M * size_Mat];
+            dist.f[DIR_P0M]   = &distributions[DIR_M0P * size_Mat];
+            dist.f[DIR_0MM]   = &distributions[DIR_0PP * size_Mat];
+            dist.f[DIR_0PP]   = &distributions[DIR_0MM * size_Mat];
+            dist.f[DIR_0MP]   = &distributions[DIR_0PM * size_Mat];
+            dist.f[DIR_0PM]   = &distributions[DIR_0MP * size_Mat];
+            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+            dist.f[DIR_MMM]  = &distributions[DIR_PPP * size_Mat];
+            dist.f[DIR_PPM]  = &distributions[DIR_MMP * size_Mat];
+            dist.f[DIR_MPM]  = &distributions[DIR_PMP * size_Mat];
+            dist.f[DIR_PMM]  = &distributions[DIR_MPP * size_Mat];
+            dist.f[DIR_MMP]  = &distributions[DIR_PPM * size_Mat];
+            dist.f[DIR_PPP]  = &distributions[DIR_MMM * size_Mat];
+            dist.f[DIR_MPP]  = &distributions[DIR_PMM * size_Mat];
+            dist.f[DIR_PMP]  = &distributions[DIR_MPM * size_Mat];
         }
         ////////////////////////////////////////////////////////////////////////////////
         //! - Set neighbor indices (necessary for indirect addressing)
@@ -156,33 +156,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Set local distributions
         //!
-        real mfcbb = (dist.f[E])[k];
-        real mfabb = (dist.f[W])[kw];
-        real mfbcb = (dist.f[N])[k];
-        real mfbab = (dist.f[S])[ks];
-        real mfbbc = (dist.f[T])[k];
-        real mfbba = (dist.f[B])[kb];
-        real mfccb = (dist.f[NE])[k];
-        real mfaab = (dist.f[SW])[ksw];
-        real mfcab = (dist.f[SE])[ks];
-        real mfacb = (dist.f[NW])[kw];
-        real mfcbc = (dist.f[TE])[k];
-        real mfaba = (dist.f[BW])[kbw];
-        real mfcba = (dist.f[BE])[kb];
-        real mfabc = (dist.f[TW])[kw];
-        real mfbcc = (dist.f[TN])[k];
-        real mfbaa = (dist.f[BS])[kbs];
-        real mfbca = (dist.f[BN])[kb];
-        real mfbac = (dist.f[TS])[ks];
-        real mfbbb = (dist.f[REST])[k];
-        real mfccc = (dist.f[TNE])[k];
-        real mfaac = (dist.f[TSW])[ksw];
-        real mfcac = (dist.f[TSE])[ks];
-        real mfacc = (dist.f[TNW])[kw];
-        real mfcca = (dist.f[BNE])[kb];
-        real mfaaa = (dist.f[BSW])[kbsw];
-        real mfcaa = (dist.f[BSE])[kbs];
-        real mfaca = (dist.f[BNW])[kbw];
+        real mfcbb = (dist.f[DIR_P00])[k];
+        real mfabb = (dist.f[DIR_M00])[kw];
+        real mfbcb = (dist.f[DIR_0P0])[k];
+        real mfbab = (dist.f[DIR_0M0])[ks];
+        real mfbbc = (dist.f[DIR_00P])[k];
+        real mfbba = (dist.f[DIR_00M])[kb];
+        real mfccb = (dist.f[DIR_PP0])[k];
+        real mfaab = (dist.f[DIR_MM0])[ksw];
+        real mfcab = (dist.f[DIR_PM0])[ks];
+        real mfacb = (dist.f[DIR_MP0])[kw];
+        real mfcbc = (dist.f[DIR_P0P])[k];
+        real mfaba = (dist.f[DIR_M0M])[kbw];
+        real mfcba = (dist.f[DIR_P0M])[kb];
+        real mfabc = (dist.f[DIR_M0P])[kw];
+        real mfbcc = (dist.f[DIR_0PP])[k];
+        real mfbaa = (dist.f[DIR_0MM])[kbs];
+        real mfbca = (dist.f[DIR_0PM])[kb];
+        real mfbac = (dist.f[DIR_0MP])[ks];
+        real mfbbb = (dist.f[DIR_000])[k];
+        real mfccc = (dist.f[DIR_PPP])[k];
+        real mfaac = (dist.f[DIR_MMP])[ksw];
+        real mfcac = (dist.f[DIR_PMP])[ks];
+        real mfacc = (dist.f[DIR_MPP])[kw];
+        real mfcca = (dist.f[DIR_PPM])[kb];
+        real mfaaa = (dist.f[DIR_MMM])[kbsw];
+        real mfcaa = (dist.f[DIR_PMM])[kbs];
+        real mfaca = (dist.f[DIR_MPM])[kbw];
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
@@ -330,7 +330,7 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
         real O6 = c1o1;
 
         ////////////////////////////////////////////////////////////////////////////////////
-        //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
+        //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
         //! different bulk viscosity).
@@ -623,32 +623,32 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
         //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
         //! DOI:10.3390/computation5020019 ]</b></a>
         //!
-        (dist.f[E])[k]      = mfabb;
-        (dist.f[W])[kw]     = mfcbb;
-        (dist.f[N])[k]      = mfbab;
-        (dist.f[S])[ks]     = mfbcb;
-        (dist.f[T])[k]      = mfbba;
-        (dist.f[B])[kb]     = mfbbc;
-        (dist.f[NE])[k]     = mfaab;
-        (dist.f[SW])[ksw]   = mfccb;
-        (dist.f[SE])[ks]    = mfacb;
-        (dist.f[NW])[kw]    = mfcab;
-        (dist.f[TE])[k]     = mfaba;
-        (dist.f[BW])[kbw]   = mfcbc;
-        (dist.f[BE])[kb]    = mfabc;
-        (dist.f[TW])[kw]    = mfcba;
-        (dist.f[TN])[k]     = mfbaa;
-        (dist.f[BS])[kbs]   = mfbcc;
-        (dist.f[BN])[kb]    = mfbac;
-        (dist.f[TS])[ks]    = mfbca;
-        (dist.f[REST])[k]   = mfbbb;
-        (dist.f[TNE])[k]    = mfaaa;
-        (dist.f[TSE])[ks]   = mfaca;
-        (dist.f[BNE])[kb]   = mfaac;
-        (dist.f[BSE])[kbs]  = mfacc;
-        (dist.f[TNW])[kw]   = mfcaa;
-        (dist.f[TSW])[ksw]  = mfcca;
-        (dist.f[BNW])[kbw]  = mfcac;
-        (dist.f[BSW])[kbsw] = mfccc;
+        (dist.f[DIR_P00])[k]      = mfabb;
+        (dist.f[DIR_M00])[kw]     = mfcbb;
+        (dist.f[DIR_0P0])[k]      = mfbab;
+        (dist.f[DIR_0M0])[ks]     = mfbcb;
+        (dist.f[DIR_00P])[k]      = mfbba;
+        (dist.f[DIR_00M])[kb]     = mfbbc;
+        (dist.f[DIR_PP0])[k]     = mfaab;
+        (dist.f[DIR_MM0])[ksw]   = mfccb;
+        (dist.f[DIR_PM0])[ks]    = mfacb;
+        (dist.f[DIR_MP0])[kw]    = mfcab;
+        (dist.f[DIR_P0P])[k]     = mfaba;
+        (dist.f[DIR_M0M])[kbw]   = mfcbc;
+        (dist.f[DIR_P0M])[kb]    = mfabc;
+        (dist.f[DIR_M0P])[kw]    = mfcba;
+        (dist.f[DIR_0PP])[k]     = mfbaa;
+        (dist.f[DIR_0MM])[kbs]   = mfbcc;
+        (dist.f[DIR_0PM])[kb]    = mfbac;
+        (dist.f[DIR_0MP])[ks]    = mfbca;
+        (dist.f[DIR_000])[k]   = mfbbb;
+        (dist.f[DIR_PPP])[k]    = mfaaa;
+        (dist.f[DIR_PMP])[ks]   = mfaca;
+        (dist.f[DIR_PPM])[kb]   = mfaac;
+        (dist.f[DIR_PMM])[kbs]  = mfacc;
+        (dist.f[DIR_MPP])[kw]   = mfcaa;
+        (dist.f[DIR_MMP])[ksw]  = mfcca;
+        (dist.f[DIR_MPM])[kbw]  = mfcac;
+        (dist.f[DIR_MMM])[kbsw] = mfccc;
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
index 1d42d65f020dd7393321498666a298630883f6ad..a480278652ca3bae0122d33b2655e2210d203727 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17CompChim(
+__global__ void LB_Kernel_CumulantK17CompChim(
 	real omega,
 	uint* typeOfGridNode,
 	uint* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8c06b7117c8b1ef62b932a76bf5de0be2ae99b1c
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.cu
@@ -0,0 +1,61 @@
+#include "CumulantK17CompChimRedesigned.h"
+
+#include "Parameter/Parameter.h"
+#include "Parameter/CudaStreamManager.h"
+#include "CumulantK17CompChimRedesigned_Device.cuh"
+
+#include <cuda.h>
+
+std::shared_ptr<CumulantK17CompChimRedesigned> CumulantK17CompChimRedesigned::getNewInstance(std::shared_ptr<Parameter> para,
+                                                                               int level)
+{
+    return std::shared_ptr<CumulantK17CompChimRedesigned>(new CumulantK17CompChimRedesigned(para, level));
+}
+
+void CumulantK17CompChimRedesigned::run()
+{
+    LB_Kernel_CumulantK17CompChimRedesigned <<< cudaGrid.grid, cudaGrid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep,
+        para->getParD(level)->fluidNodeIndices,
+        para->getParD(level)->numberOfFluidNodes);
+    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
+}
+
+void CumulantK17CompChimRedesigned::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex)
+{
+    cudaStream_t stream = (streamIndex == -1) ? CU_STREAM_LEGACY : para->getStreamManager()->getStream(streamIndex);
+
+    LB_Kernel_CumulantK17CompChimRedesigned<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>(
+        para->getParD(level)->omega, 
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ, 
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->numberOfNodes, 
+        level,
+        para->getForcesDev(), 
+        para->getQuadricLimitersDev(),
+        para->getParD(level)->isEvenTimestep,
+        indices,
+        size_indices);
+    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
+    
+}
+
+CumulantK17CompChimRedesigned::CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
+{
+    myPreProcessorTypes.push_back(InitCompSP27);
+    myKernelGroup = BasicKernel;
+    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+    this->kernelUsesFluidNodeIndices = true;
+}
+
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h
new file mode 100644
index 0000000000000000000000000000000000000000..4658075de330665fdba88a5ec8149a9b476d5ac7
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h
@@ -0,0 +1,18 @@
+#ifndef CUMULANT_K17_COMP_CHIM_REDESIGN_H
+#define CUMULANT_K17_COMP_CHIM_REDESIGN_H
+
+#include "Kernel/KernelImp.h"
+
+class CumulantK17CompChimRedesigned : public KernelImp
+{
+public:
+    static std::shared_ptr<CumulantK17CompChimRedesigned> getNewInstance(std::shared_ptr<Parameter> para, int level);
+	void run() override;
+    void runOnIndices(const unsigned int *indices, unsigned int size_indices, int stream = -1) override;
+
+private:
+    CumulantK17CompChimRedesigned();
+    CumulantK17CompChimRedesigned(std::shared_ptr<Parameter> para, int level);
+};
+
+#endif 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2636df9614ea1c195d77f3366a7815951f64776a
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesignedDevice.cu
@@ -0,0 +1,603 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __         
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
+//      \    \  |    |   ________________________________________________________________    
+//       \    \ |    |  |  ______________________________________________________________|   
+//        \    \|    |  |  |         __          __     __     __     ______      _______    
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can 
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of 
+//  the License, or (at your option) any later version.
+//  
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
+//  for more details.
+//  
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file Cumulant27chimStream.cu
+//! \ingroup GPU
+//! \author Martin Schoenherr, Anna Wellmann
+//=======================================================================================
+/* Device code */
+#include "LBM/LB.h" 
+#include "lbm/constants/D3Q27.h"
+#include <lbm/constants/NumericConstants.h>
+#include "Kernel/Utilities/DistributionHelper.cuh"
+
+using namespace vf::lbm::constant;
+using namespace vf::lbm::dir;
+#include "Kernel/ChimeraTransformation.h"
+
+////////////////////////////////////////////////////////////////////////////////
+__global__ void LB_Kernel_CumulantK17CompChimRedesigned(
+    real omega,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    unsigned long numberOfLBnodes,
+    int level,
+    real* forces,
+    real* quadricLimiters,
+    bool isEvenTimestep,
+    const uint *fluidNodeIndices,
+    uint numberOfFluidNodes)
+{
+    //////////////////////////////////////////////////////////////////////////
+    //! Cumulant K17 Kernel is based on \ref
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+    //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
+    //!
+    //! The cumulant kernel is executed in the following steps
+    //!
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get the thread index coordinates from threadIdx, blockIdx, blockDim and gridDim.
+    //!
+    const unsigned kThread = vf::gpu::getNodeIndex();
+
+    //////////////////////////////////////////////////////////////////////////
+    //! - Return for non-fluid nodes
+    if (kThread >= numberOfFluidNodes) 
+        return;
+
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Get the node index from the array containing all indices of fluid nodes
+    //!
+    const unsigned k_000 = fluidNodeIndices[kThread];
+
+    //////////////////////////////////////////////////////////////////////////
+    //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
+    //! timestep is based on the esoteric twist algorithm \ref <a
+    //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+    //! DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    Distributions27 dist = vf::gpu::getDistributionReferences27(distributions, numberOfLBnodes, isEvenTimestep);
+    
+    ////////////////////////////////////////////////////////////////////////////////
+    //! - Set neighbor indices (necessary for indirect addressing)
+    //!
+    uint k_M00 = neighborX[k_000];
+    uint k_0M0 = neighborY[k_000];
+    uint k_00M = neighborZ[k_000];
+    uint k_MM0 = neighborY[k_M00];
+    uint k_M0M = neighborZ[k_M00];
+    uint k_0MM = neighborZ[k_0M0];
+    uint k_MMM = neighborZ[k_MM0];
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Set local distributions (f's):
+    //!
+    real f_000 = (dist.f[DIR_000])[k_000];
+    real f_P00 = (dist.f[DIR_P00])[k_000];
+    real f_M00 = (dist.f[DIR_M00])[k_M00];
+    real f_0P0 = (dist.f[DIR_0P0])[k_000];
+    real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+    real f_00P = (dist.f[DIR_00P])[k_000];
+    real f_00M = (dist.f[DIR_00M])[k_00M];
+    real f_PP0 = (dist.f[DIR_PP0])[k_000];
+    real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+    real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+    real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+    real f_P0P = (dist.f[DIR_P0P])[k_000];
+    real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+    real f_P0M = (dist.f[DIR_P0M])[k_00M];
+    real f_M0P = (dist.f[DIR_M0P])[k_M00];
+    real f_0PP = (dist.f[DIR_0PP])[k_000];
+    real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+    real f_0PM = (dist.f[DIR_0PM])[k_00M];
+    real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+    real f_PPP = (dist.f[DIR_PPP])[k_000];
+    real f_MPP = (dist.f[DIR_MPP])[k_M00];
+    real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+    real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+    real f_PPM = (dist.f[DIR_PPM])[k_00M];
+    real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+    real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+    real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Define aliases to use the same variable for the moments (m's):
+    //!
+    real& m_111 = f_000;
+    real& m_211 = f_P00;
+    real& m_011 = f_M00;
+    real& m_121 = f_0P0;
+    real& m_101 = f_0M0;
+    real& m_112 = f_00P;
+    real& m_110 = f_00M;
+    real& m_221 = f_PP0;
+    real& m_001 = f_MM0;
+    real& m_201 = f_PM0;
+    real& m_021 = f_MP0;
+    real& m_212 = f_P0P;
+    real& m_010 = f_M0M;
+    real& m_210 = f_P0M;
+    real& m_012 = f_M0P;
+    real& m_122 = f_0PP;
+    real& m_100 = f_0MM;
+    real& m_120 = f_0PM;
+    real& m_102 = f_0MP;
+    real& m_222 = f_PPP;
+    real& m_022 = f_MPP;
+    real& m_202 = f_PMP;
+    real& m_002 = f_MMP;
+    real& m_220 = f_PPM;
+    real& m_020 = f_MPM;
+    real& m_200 = f_PMM;
+    real& m_000 = f_MMM;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
+    //!
+    real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+                 (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                  ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                  ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                    f_000;
+
+    real oneOverRho = c1o1 / (c1o1 + drho);
+
+    real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+               oneOverRho;
+    real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+               oneOverRho;
+    real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+               oneOverRho;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
+    //!
+    real factor = c1o1;
+    // The factor has to be scaled for each level to get the correct acceleration.
+    for (size_t i = 1; i <= level; i++) {
+        factor *= c2o1;
+    }
+    real fx = forces[0] / factor;
+    real fy = forces[1] / factor;
+    real fz = forces[2] / factor;
+    vvx += fx * c1o2;
+    vvy += fy * c1o2;
+    vvz += fz * c1o2;
+    ////////////////////////////////////////////////////////////////////////////////////
+    // calculate the square of velocities for this lattice node
+    real vx2 = vvx * vvx;
+    real vy2 = vvy * vvy;
+    real vz2 = vvz * vvz;
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to
+    //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+    real quadricLimitP = quadricLimiters[0];
+    real quadricLimitM = quadricLimiters[1];
+    real quadricLimitD = quadricLimiters[2];
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a
+    //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+    //! ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    forwardInverseChimeraWithK(f_MMM, f_MM0, f_MMP, vvz, vz2, c36o1, c1o36);
+    forwardInverseChimeraWithK(f_M0M, f_M00, f_M0P, vvz, vz2, c9o1,  c1o9);
+    forwardInverseChimeraWithK(f_MPM, f_MP0, f_MPP, vvz, vz2, c36o1, c1o36);
+    forwardInverseChimeraWithK(f_0MM, f_0M0, f_0MP, vvz, vz2, c9o1,  c1o9);
+    forwardInverseChimeraWithK(f_00M, f_000, f_00P, vvz, vz2, c9o4,  c4o9);
+    forwardInverseChimeraWithK(f_0PM, f_0P0, f_0PP, vvz, vz2, c9o1,  c1o9);
+    forwardInverseChimeraWithK(f_PMM, f_PM0, f_PMP, vvz, vz2, c36o1, c1o36);
+    forwardInverseChimeraWithK(f_P0M, f_P00, f_P0P, vvz, vz2, c9o1,  c1o9);
+    forwardInverseChimeraWithK(f_PPM, f_PP0, f_PPP, vvz, vz2, c36o1, c1o36);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    forwardInverseChimeraWithK(f_MMM, f_M0M, f_MPM, vvy, vy2, c6o1,  c1o6);
+    forwardChimera(            f_MM0, f_M00, f_MP0, vvy, vy2);
+    forwardInverseChimeraWithK(f_MMP, f_M0P, f_MPP, vvy, vy2, c18o1, c1o18);
+    forwardInverseChimeraWithK(f_0MM, f_00M, f_0PM, vvy, vy2, c3o2,  c2o3);
+    forwardChimera(            f_0M0, f_000, f_0P0, vvy, vy2);
+    forwardInverseChimeraWithK(f_0MP, f_00P, f_0PP, vvy, vy2, c9o2,  c2o9);
+    forwardInverseChimeraWithK(f_PMM, f_P0M, f_PPM, vvy, vy2, c6o1,  c1o6);
+    forwardChimera(            f_PM0, f_P00, f_PP0, vvy, vy2);
+    forwardInverseChimeraWithK(f_PMP, f_P0P, f_PPP, vvy, vy2, c18o1, c1o18);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    forwardInverseChimeraWithK(f_MMM, f_0MM, f_PMM, vvx, vx2, c1o1, c1o1);
+    forwardChimera(            f_M0M, f_00M, f_P0M, vvx, vx2);
+    forwardInverseChimeraWithK(f_MPM, f_0PM, f_PPM, vvx, vx2, c3o1, c1o3);
+    forwardChimera(            f_MM0, f_0M0, f_PM0, vvx, vx2);
+    forwardChimera(            f_M00, f_000, f_P00, vvx, vx2);
+    forwardChimera(            f_MP0, f_0P0, f_PP0, vvx, vx2);
+    forwardInverseChimeraWithK(f_MMP, f_0MP, f_PMP, vvx, vx2, c3o1, c1o3);
+    forwardChimera(            f_M0P, f_00P, f_P0P, vvx, vx2);
+    forwardInverseChimeraWithK(f_MPP, f_0PP, f_PPP, vvx, vx2, c3o1, c1o9);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
+    //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!  => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE].
+    //!  - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk
+    //!  viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$.
+    //!  - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz
+    //!  \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$.
+    //!  - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz
+    //!  \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$.
+    //!  - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with
+    //!  simplifications assuming \f$ \omega_2 = 1.0\f$  (modify for different bulk viscosity).
+    //!  - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification
+    //!  all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$.
+    //!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
+    //!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
+    //!
+    ////////////////////////////////////////////////////////////
+    // 2.
+    real OxxPyyPzz = c1o1;
+    ////////////////////////////////////////////////////////////
+    // 3.
+    real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega)  / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
+    real OxyyMxzz = c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
+    real Oxyz     = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
+                    (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
+    ////////////////////////////////////////////////////////////
+    // 4.
+    real O4 = c1o1;
+    ////////////////////////////////////////////////////////////
+    // 5.
+    real O5 = c1o1;
+    ////////////////////////////////////////////////////////////
+    // 6.
+    real O6 = c1o1;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
+    //! different bulk viscosity).
+    //!
+    real factorA = (c4o1 + c2o1  * omega - c3o1  * omega * omega) / (c2o1 - c7o1  * omega + c5o1  * omega * omega);
+    real factorB = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute cumulants (c's) from central moments according to Eq. (20)-(23) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////
+    // 4.
+    real c_211 = m_211 - ((m_200 + c1o3) * m_011 + c2o1 * m_110 * m_101) * oneOverRho;
+    real c_121 = m_121 - ((m_020 + c1o3) * m_101 + c2o1 * m_110 * m_011) * oneOverRho;
+    real c_112 = m_112 - ((m_002 + c1o3) * m_110 + c2o1 * m_101 * m_011) * oneOverRho;
+
+    real c_220 = m_220 - (((m_200 * m_020 + c2o1 * m_110 * m_110) + c1o3 * (m_200 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
+    real c_202 = m_202 - (((m_200 * m_002 + c2o1 * m_101 * m_101) + c1o3 * (m_200 + m_002)) * oneOverRho - c1o9 * (drho * oneOverRho));
+    real c_022 = m_022 - (((m_002 * m_020 + c2o1 * m_011 * m_011) + c1o3 * (m_002 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
+    ////////////////////////////////////////////////////////////
+    // 5.
+    real c_122 =
+        m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
+                 c1o3 * (m_120 + m_102)) *
+                 oneOverRho;
+    real c_212 =
+        m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
+                 c1o3 * (m_210 + m_012)) *
+                 oneOverRho;
+    real c_221 =
+        m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
+                 c1o3 * (m_021 + m_201)) *
+                 oneOverRho;
+    ////////////////////////////////////////////////////////////
+    // 6.
+    real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
+                            c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
+                            c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
+                            oneOverRho +
+                           (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                            c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
+                            oneOverRho * oneOverRho -
+                            c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
+                           (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                            (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
+                            oneOverRho * oneOverRho * c2o3 +
+                            c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute linear combinations of second and third order cumulants
+    //!
+    ////////////////////////////////////////////////////////////
+    // 2.
+    real mxxPyyPzz = m_200 + m_020 + m_002;
+    real mxxMyy    = m_200 - m_020;
+    real mxxMzz    = m_200 - m_002;
+    ////////////////////////////////////////////////////////////
+    // 3.
+    real mxxyPyzz = m_210 + m_012;
+    real mxxyMyzz = m_210 - m_012;
+
+    real mxxzPyyz = m_201 + m_021;
+    real mxxzMyyz = m_201 - m_021;
+
+    real mxyyPxzz = m_120 + m_102;
+    real mxyyMxzz = m_120 - m_102;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // incl. correction
+    ////////////////////////////////////////////////////////////
+    //! - Compute velocity  gradients from second order cumulants according to Eq. (27)-(32)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times
+    //! the gradients later.
+    //!
+    real Dxy  = -c3o1 * omega * m_110;
+    real Dxz  = -c3o1 * omega * m_101;
+    real Dyz  = -c3o1 * omega * m_011;
+    real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz);
+    real dyuy = dxux + omega * c3o2 * mxxMyy;
+    real dzuz = dxux + omega * c3o2 * mxxMzz;
+    ////////////////////////////////////////////////////////////
+    //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+    mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+    mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
+    mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
+    //////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    ////no correction
+    // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);
+    // mxxMyy += -(-omega) * (-mxxMyy);
+    // mxxMzz += -(-omega) * (-mxxMzz);
+    //////////////////////////////////////////////////////////////////////////
+    
+    m_011 += omega * (-m_011);
+    m_101 += omega * (-m_101);
+    m_110 += omega * (-m_110);
+
+    //////////////////////////////////////////////////////////////////////////
+    //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+    //////////////////////////////////////////////////////////////////////////
+    // incl. limiter
+    real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD);
+    m_111 += wadjust * (-m_111);
+    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP);
+    mxxyPyzz += wadjust * (-mxxyPyzz);
+    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + quadricLimitM);
+    mxxyMyzz += wadjust * (-mxxyMyzz);
+    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + quadricLimitP);
+    mxxzPyyz += wadjust * (-mxxzPyyz);
+    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + quadricLimitM);
+    mxxzMyyz += wadjust * (-mxxzMyyz);
+    wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + quadricLimitP);
+    mxyyPxzz += wadjust * (-mxyyPxzz);
+    wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + quadricLimitM);
+    mxyyMxzz += wadjust * (-mxyyMxzz);
+    //////////////////////////////////////////////////////////////////////////
+    // no limiter
+    // mfbbb += OxyyMxzz * (-mfbbb);
+    // mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
+    // mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
+    // mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
+    // mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
+    // mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
+    // mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute inverse linear combinations of second and third order cumulants
+    //!
+    m_200 = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+    m_020 = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+    m_002 = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
+
+    m_210 = ( mxxyMyzz + mxxyPyzz) * c1o2;
+    m_012 = (-mxxyMyzz + mxxyPyzz) * c1o2;
+    m_201 = ( mxxzMyyz + mxxzPyyz) * c1o2;
+    m_021 = (-mxxzMyyz + mxxzPyyz) * c1o2;
+    m_120 = ( mxyyMxzz + mxyyPxzz) * c1o2;
+    m_102 = (-mxyyMxzz + mxyyPxzz) * c1o2;
+    //////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    // 4.
+    // no limiter
+    //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according
+    //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+    c_022 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_022);
+    c_202 = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_202);
+    c_220 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (c_220);
+    c_112 = -O4 * (c1o1 / omega - c1o2) * Dxy           * c1o3 * factorB + (c1o1 - O4) * (c_112);
+    c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz           * c1o3 * factorB + (c1o1 - O4) * (c_121);
+    c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz           * c1o3 * factorB + (c1o1 - O4) * (c_211);
+
+    //////////////////////////////////////////////////////////////////////////
+    // 5.
+    c_122 += O5 * (-c_122);
+    c_212 += O5 * (-c_212);
+    c_221 += O5 * (-c_221);
+
+    //////////////////////////////////////////////////////////////////////////
+    // 6.
+    c_222 += O6 * (-c_222);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
+    //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
+    //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
+    //!
+
+    //////////////////////////////////////////////////////////////////////////
+    // 4.
+    m_211 = c_211 + c1o3 * ((c3o1 * m_200 + c1o1) * m_011 + c6o1 * m_110 * m_101) * oneOverRho;
+    m_121 = c_121 + c1o3 * ((c3o1 * m_020 + c1o1) * m_101 + c6o1 * m_110 * m_011) * oneOverRho;
+    m_112 = c_112 + c1o3 * ((c3o1 * m_002 + c1o1) * m_110 + c6o1 * m_101 * m_011) * oneOverRho;
+
+    m_220 =
+        c_220 + (((m_200 * m_020 + c2o1 * m_110 * m_110) * c9o1 + c3o1 * (m_200 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
+    m_202 =
+        c_202 + (((m_200 * m_002 + c2o1 * m_101 * m_101) * c9o1 + c3o1 * (m_200 + m_002)) * oneOverRho - (drho * oneOverRho)) * c1o9;
+    m_022 =
+        c_022 + (((m_002 * m_020 + c2o1 * m_011 * m_011) * c9o1 + c3o1 * (m_002 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
+
+    //////////////////////////////////////////////////////////////////////////
+    // 5.
+    m_122 = c_122 + c1o3 *
+            (c3o1 * (m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
+            (m_120 + m_102)) * oneOverRho;
+    m_212 = c_212 + c1o3 *
+            (c3o1 * (m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
+            (m_210 + m_012)) * oneOverRho;
+    m_221 = c_221 + c1o3 *
+            (c3o1 * (m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
+            (m_021 + m_201)) * oneOverRho;
+
+    //////////////////////////////////////////////////////////////////////////
+    // 6.
+    m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
+                       c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
+                       c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
+                       oneOverRho +
+                      (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                       c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
+                       oneOverRho * oneOverRho -
+                       c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
+                      (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                       (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
+                       oneOverRho * oneOverRho * c2o3 +
+                       c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
+    //!
+    m_100 = -m_100;
+    m_010 = -m_010;
+    m_001 = -m_001;
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
+    //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
+    //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a
+    //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
+    //! ]</b></a>
+    //!
+    ////////////////////////////////////////////////////////////////////////////////////
+    // X - Dir
+    backwardInverseChimeraWithK(m_000, m_100, m_200, vvx, vx2, c1o1, c1o1);
+    backwardChimera(            m_010, m_110, m_210, vvx, vx2);
+    backwardInverseChimeraWithK(m_020, m_120, m_220, vvx, vx2, c3o1, c1o3);
+    backwardChimera(            m_001, m_101, m_201, vvx, vx2);
+    backwardChimera(            m_011, m_111, m_211, vvx, vx2);
+    backwardChimera(            m_021, m_121, m_221, vvx, vx2);
+    backwardInverseChimeraWithK(m_002, m_102, m_202, vvx, vx2, c3o1, c1o3);
+    backwardChimera(            m_012, m_112, m_212, vvx, vx2);
+    backwardInverseChimeraWithK(m_022, m_122, m_222, vvx, vx2, c9o1, c1o9);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Y - Dir
+    backwardInverseChimeraWithK(m_000, m_010, m_020, vvy, vy2, c6o1, c1o6);
+    backwardChimera(            m_001, m_011, m_021, vvy, vy2);
+    backwardInverseChimeraWithK(m_002, m_012, m_022, vvy, vy2, c18o1, c1o18);
+    backwardInverseChimeraWithK(m_100, m_110, m_120, vvy, vy2, c3o2, c2o3);
+    backwardChimera(            m_101, m_111, m_121, vvy, vy2);
+    backwardInverseChimeraWithK(m_102, m_112, m_122, vvy, vy2, c9o2, c2o9);
+    backwardInverseChimeraWithK(m_200, m_210, m_220, vvy, vy2, c6o1, c1o6);
+    backwardChimera(            m_201, m_211, m_221, vvy, vy2);
+    backwardInverseChimeraWithK(m_202, m_212, m_222, vvy, vy2, c18o1, c1o18);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // Z - Dir
+    backwardInverseChimeraWithK(m_000, m_001, m_002, vvz, vz2, c36o1, c1o36);
+    backwardInverseChimeraWithK(m_010, m_011, m_012, vvz, vz2, c9o1, c1o9);
+    backwardInverseChimeraWithK(m_020, m_021, m_022, vvz, vz2, c36o1, c1o36);
+    backwardInverseChimeraWithK(m_100, m_101, m_102, vvz, vz2, c9o1, c1o9);
+    backwardInverseChimeraWithK(m_110, m_111, m_112, vvz, vz2, c9o4, c4o9);
+    backwardInverseChimeraWithK(m_120, m_121, m_122, vvz, vz2, c9o1, c1o9);
+    backwardInverseChimeraWithK(m_200, m_201, m_202, vvz, vz2, c36o1, c1o36);
+    backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz2, c9o1, c1o9);
+    backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz2, c36o1, c1o36);
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    //! - Write distributions: style of reading and writing the distributions from/to
+    //! stored arrays dependent on timestep is based on the esoteric twist algorithm
+    //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
+    //! DOI:10.3390/computation5020019 ]</b></a>
+    //!
+    (dist.f[DIR_P00])[k_000]    = f_M00;
+    (dist.f[DIR_M00])[k_M00]    = f_P00;
+    (dist.f[DIR_0P0])[k_000]    = f_0M0;
+    (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
+    (dist.f[DIR_00P])[k_000]    = f_00M;
+    (dist.f[DIR_00M])[k_00M]    = f_00P;
+    (dist.f[DIR_PP0])[k_000]   = f_MM0;
+    (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
+    (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
+    (dist.f[DIR_MP0])[k_M00]   = f_PM0;
+    (dist.f[DIR_P0P])[k_000]   = f_M0M;
+    (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
+    (dist.f[DIR_P0M])[k_00M]   = f_M0P;
+    (dist.f[DIR_M0P])[k_M00]   = f_P0M;
+    (dist.f[DIR_0PP])[k_000]   = f_0MM;
+    (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
+    (dist.f[DIR_0PM])[k_00M]   = f_0MP;
+    (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
+    (dist.f[DIR_000])[k_000] = f_000;
+    (dist.f[DIR_PPP])[k_000]  = f_MMM;
+    (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
+    (dist.f[DIR_PPM])[k_00M]  = f_MMP;
+    (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
+    (dist.f[DIR_MPP])[k_M00]  = f_PMM;
+    (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
+    (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
+    (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
+}
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..00628efc76447a09504d2fd32a26a63a4d611c66
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned_Device.cuh
@@ -0,0 +1,20 @@
+#ifndef LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H
+#define LB_Kernel_CUMULANT_K17_COMP_CHIM_REDESIGN_H
+
+#include <DataTypes.h>
+#include <curand.h>
+
+__global__ void LB_Kernel_CumulantK17CompChimRedesigned(
+    real omega,
+    uint* neighborX,
+    uint* neighborY,
+    uint* neighborZ,
+    real* distributions,
+    unsigned long numberOfLBnodes,
+    int level,
+    real* forces,
+    real* quadricLimiters,
+    bool isEvenTimestep,
+    const uint* fluidNodeIndices,
+    uint numberOfFluidNodes);
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
index 7628d2a8ecd733355136149c6adad0f260023d3f..6fae9f6d4845019afd363790eea0ee17c69a060f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.cu
@@ -14,20 +14,20 @@ std::shared_ptr<CumulantK17CompChimStream> CumulantK17CompChimStream::getNewInst
 
 void CumulantK17CompChimStream::run()
 {
-	LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>(
-		para->getParD(level)->omega,
-		para->getParD(level)->neighborX,
-		para->getParD(level)->neighborY,
-		para->getParD(level)->neighborZ,
-		para->getParD(level)->distributions.f[0],
-		para->getParD(level)->numberOfNodes,
-		level,
-		para->getForcesDev(),
+    LB_Kernel_CumulantK17CompChimStream <<< cudaGrid.grid, cudaGrid.threads >>>(
+        para->getParD(level)->omega,
+        para->getParD(level)->neighborX,
+        para->getParD(level)->neighborY,
+        para->getParD(level)->neighborZ,
+        para->getParD(level)->distributions.f[0],
+        para->getParD(level)->numberOfNodes,
+        level,
+        para->getForcesDev(),
         para->getQuadricLimitersDev(),
-		para->getParD(level)->isEvenTimestep,
+        para->getParD(level)->isEvenTimestep,
         para->getParD(level)->fluidNodeIndices,
-		para->getParD(level)->numberOfFluidNodes);
-	getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
+        para->getParD(level)->numberOfFluidNodes);
+    getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
 }
 
 void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsigned int size_indices, int streamIndex)
@@ -36,25 +36,26 @@ void CumulantK17CompChimStream::runOnIndices(const unsigned int *indices, unsign
 
     LB_Kernel_CumulantK17CompChimStream<<< cudaGrid.grid, cudaGrid.threads, 0, stream>>>(
         para->getParD(level)->omega, 
-	    para->getParD(level)->neighborX, 
-	    para->getParD(level)->neighborY,
+        para->getParD(level)->neighborX, 
+        para->getParD(level)->neighborY,
         para->getParD(level)->neighborZ, 
-	    para->getParD(level)->distributions.f[0], 
-	    para->getParD(level)->numberOfNodes, 
-	    level,
+        para->getParD(level)->distributions.f[0], 
+        para->getParD(level)->numberOfNodes, 
+        level,
         para->getForcesDev(), 
-	    para->getQuadricLimitersDev(), 
-	    para->getParD(level)->isEvenTimestep,
+        para->getQuadricLimitersDev(), 
+        para->getParD(level)->isEvenTimestep,
         indices,
-	    size_indices);
+        size_indices);
     getLastCudaError("LB_Kernel_CumulantK17CompChim execution failed");
     
 }
 
 CumulantK17CompChimStream::CumulantK17CompChimStream(std::shared_ptr<Parameter> para, int level): KernelImp(para, level)
 {
-	myPreProcessorTypes.push_back(InitCompSP27);
-	myKernelGroup = BasicKernel;
-	this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+    myPreProcessorTypes.push_back(InitCompSP27);
+    myKernelGroup = BasicKernel;
+    this->cudaGrid = vf::cuda::CudaGrid(para->getParD(level)->numberofthreads, para->getParD(level)->numberOfNodes);
+    this->kernelUsesFluidNodeIndices = true;
 }
 
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
index aab6c580396efcac8056c355d5047a3062c750d1..d973cad5100b5b26871adab48db60b03dd05e487 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStreamDevice.cu
@@ -40,7 +40,7 @@ using namespace vf::lbm::dir;
 #include "Kernel/ChimeraTransformation.h"
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
+__global__ void LB_Kernel_CumulantK17CompChimStream(
 	real omega,
 	uint* neighborX,
 	uint* neighborY,
@@ -87,61 +87,61 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
 
         Distributions27 dist;
         if (isEvenTimestep) {
-            dist.f[E]    = &distributions[E * size_Mat];
-            dist.f[W]    = &distributions[W * size_Mat];
-            dist.f[N]    = &distributions[N * size_Mat];
-            dist.f[S]    = &distributions[S * size_Mat];
-            dist.f[T]    = &distributions[T * size_Mat];
-            dist.f[B]    = &distributions[B * size_Mat];
-            dist.f[NE]   = &distributions[NE * size_Mat];
-            dist.f[SW]   = &distributions[SW * size_Mat];
-            dist.f[SE]   = &distributions[SE * size_Mat];
-            dist.f[NW]   = &distributions[NW * size_Mat];
-            dist.f[TE]   = &distributions[TE * size_Mat];
-            dist.f[BW]   = &distributions[BW * size_Mat];
-            dist.f[BE]   = &distributions[BE * size_Mat];
-            dist.f[TW]   = &distributions[TW * size_Mat];
-            dist.f[TN]   = &distributions[TN * size_Mat];
-            dist.f[BS]   = &distributions[BS * size_Mat];
-            dist.f[BN]   = &distributions[BN * size_Mat];
-            dist.f[TS]   = &distributions[TS * size_Mat];
-            dist.f[REST] = &distributions[REST * size_Mat];
-            dist.f[TNE]  = &distributions[TNE * size_Mat];
-            dist.f[TSW]  = &distributions[TSW * size_Mat];
-            dist.f[TSE]  = &distributions[TSE * size_Mat];
-            dist.f[TNW]  = &distributions[TNW * size_Mat];
-            dist.f[BNE]  = &distributions[BNE * size_Mat];
-            dist.f[BSW]  = &distributions[BSW * size_Mat];
-            dist.f[BSE]  = &distributions[BSE * size_Mat];
-            dist.f[BNW]  = &distributions[BNW * size_Mat];
+            dist.f[DIR_P00]    = &distributions[DIR_P00 * size_Mat];
+            dist.f[DIR_M00]    = &distributions[DIR_M00 * size_Mat];
+            dist.f[DIR_0P0]    = &distributions[DIR_0P0 * size_Mat];
+            dist.f[DIR_0M0]    = &distributions[DIR_0M0 * size_Mat];
+            dist.f[DIR_00P]    = &distributions[DIR_00P * size_Mat];
+            dist.f[DIR_00M]    = &distributions[DIR_00M * size_Mat];
+            dist.f[DIR_PP0]   = &distributions[DIR_PP0 * size_Mat];
+            dist.f[DIR_MM0]   = &distributions[DIR_MM0 * size_Mat];
+            dist.f[DIR_PM0]   = &distributions[DIR_PM0 * size_Mat];
+            dist.f[DIR_MP0]   = &distributions[DIR_MP0 * size_Mat];
+            dist.f[DIR_P0P]   = &distributions[DIR_P0P * size_Mat];
+            dist.f[DIR_M0M]   = &distributions[DIR_M0M * size_Mat];
+            dist.f[DIR_P0M]   = &distributions[DIR_P0M * size_Mat];
+            dist.f[DIR_M0P]   = &distributions[DIR_M0P * size_Mat];
+            dist.f[DIR_0PP]   = &distributions[DIR_0PP * size_Mat];
+            dist.f[DIR_0MM]   = &distributions[DIR_0MM * size_Mat];
+            dist.f[DIR_0PM]   = &distributions[DIR_0PM * size_Mat];
+            dist.f[DIR_0MP]   = &distributions[DIR_0MP * size_Mat];
+            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+            dist.f[DIR_PPP]  = &distributions[DIR_PPP * size_Mat];
+            dist.f[DIR_MMP]  = &distributions[DIR_MMP * size_Mat];
+            dist.f[DIR_PMP]  = &distributions[DIR_PMP * size_Mat];
+            dist.f[DIR_MPP]  = &distributions[DIR_MPP * size_Mat];
+            dist.f[DIR_PPM]  = &distributions[DIR_PPM * size_Mat];
+            dist.f[DIR_MMM]  = &distributions[DIR_MMM * size_Mat];
+            dist.f[DIR_PMM]  = &distributions[DIR_PMM * size_Mat];
+            dist.f[DIR_MPM]  = &distributions[DIR_MPM * size_Mat];
         } else {
-            dist.f[W]    = &distributions[E * size_Mat];
-            dist.f[E]    = &distributions[W * size_Mat];
-            dist.f[S]    = &distributions[N * size_Mat];
-            dist.f[N]    = &distributions[S * size_Mat];
-            dist.f[B]    = &distributions[T * size_Mat];
-            dist.f[T]    = &distributions[B * size_Mat];
-            dist.f[SW]   = &distributions[NE * size_Mat];
-            dist.f[NE]   = &distributions[SW * size_Mat];
-            dist.f[NW]   = &distributions[SE * size_Mat];
-            dist.f[SE]   = &distributions[NW * size_Mat];
-            dist.f[BW]   = &distributions[TE * size_Mat];
-            dist.f[TE]   = &distributions[BW * size_Mat];
-            dist.f[TW]   = &distributions[BE * size_Mat];
-            dist.f[BE]   = &distributions[TW * size_Mat];
-            dist.f[BS]   = &distributions[TN * size_Mat];
-            dist.f[TN]   = &distributions[BS * size_Mat];
-            dist.f[TS]   = &distributions[BN * size_Mat];
-            dist.f[BN]   = &distributions[TS * size_Mat];
-            dist.f[REST] = &distributions[REST * size_Mat];
-            dist.f[BSW]  = &distributions[TNE * size_Mat];
-            dist.f[BNE]  = &distributions[TSW * size_Mat];
-            dist.f[BNW]  = &distributions[TSE * size_Mat];
-            dist.f[BSE]  = &distributions[TNW * size_Mat];
-            dist.f[TSW]  = &distributions[BNE * size_Mat];
-            dist.f[TNE]  = &distributions[BSW * size_Mat];
-            dist.f[TNW]  = &distributions[BSE * size_Mat];
-            dist.f[TSE]  = &distributions[BNW * size_Mat];
+            dist.f[DIR_M00]    = &distributions[DIR_P00 * size_Mat];
+            dist.f[DIR_P00]    = &distributions[DIR_M00 * size_Mat];
+            dist.f[DIR_0M0]    = &distributions[DIR_0P0 * size_Mat];
+            dist.f[DIR_0P0]    = &distributions[DIR_0M0 * size_Mat];
+            dist.f[DIR_00M]    = &distributions[DIR_00P * size_Mat];
+            dist.f[DIR_00P]    = &distributions[DIR_00M * size_Mat];
+            dist.f[DIR_MM0]   = &distributions[DIR_PP0 * size_Mat];
+            dist.f[DIR_PP0]   = &distributions[DIR_MM0 * size_Mat];
+            dist.f[DIR_MP0]   = &distributions[DIR_PM0 * size_Mat];
+            dist.f[DIR_PM0]   = &distributions[DIR_MP0 * size_Mat];
+            dist.f[DIR_M0M]   = &distributions[DIR_P0P * size_Mat];
+            dist.f[DIR_P0P]   = &distributions[DIR_M0M * size_Mat];
+            dist.f[DIR_M0P]   = &distributions[DIR_P0M * size_Mat];
+            dist.f[DIR_P0M]   = &distributions[DIR_M0P * size_Mat];
+            dist.f[DIR_0MM]   = &distributions[DIR_0PP * size_Mat];
+            dist.f[DIR_0PP]   = &distributions[DIR_0MM * size_Mat];
+            dist.f[DIR_0MP]   = &distributions[DIR_0PM * size_Mat];
+            dist.f[DIR_0PM]   = &distributions[DIR_0MP * size_Mat];
+            dist.f[DIR_000] = &distributions[DIR_000 * size_Mat];
+            dist.f[DIR_MMM]  = &distributions[DIR_PPP * size_Mat];
+            dist.f[DIR_PPM]  = &distributions[DIR_MMP * size_Mat];
+            dist.f[DIR_MPM]  = &distributions[DIR_PMP * size_Mat];
+            dist.f[DIR_PMM]  = &distributions[DIR_MPP * size_Mat];
+            dist.f[DIR_MMP]  = &distributions[DIR_PPM * size_Mat];
+            dist.f[DIR_PPP]  = &distributions[DIR_MMM * size_Mat];
+            dist.f[DIR_MPP]  = &distributions[DIR_PMM * size_Mat];
+            dist.f[DIR_PMP]  = &distributions[DIR_MPM * size_Mat];
         }
         ////////////////////////////////////////////////////////////////////////////////
         //! - Set neighbor indices (necessary for indirect addressing)
@@ -155,33 +155,33 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Set local distributions
         //!
-        real mfcbb = (dist.f[E])[k];
-        real mfabb = (dist.f[W])[kw];
-        real mfbcb = (dist.f[N])[k];
-        real mfbab = (dist.f[S])[ks];
-        real mfbbc = (dist.f[T])[k];
-        real mfbba = (dist.f[B])[kb];
-        real mfccb = (dist.f[NE])[k];
-        real mfaab = (dist.f[SW])[ksw];
-        real mfcab = (dist.f[SE])[ks];
-        real mfacb = (dist.f[NW])[kw];
-        real mfcbc = (dist.f[TE])[k];
-        real mfaba = (dist.f[BW])[kbw];
-        real mfcba = (dist.f[BE])[kb];
-        real mfabc = (dist.f[TW])[kw];
-        real mfbcc = (dist.f[TN])[k];
-        real mfbaa = (dist.f[BS])[kbs];
-        real mfbca = (dist.f[BN])[kb];
-        real mfbac = (dist.f[TS])[ks];
-        real mfbbb = (dist.f[REST])[k];
-        real mfccc = (dist.f[TNE])[k];
-        real mfaac = (dist.f[TSW])[ksw];
-        real mfcac = (dist.f[TSE])[ks];
-        real mfacc = (dist.f[TNW])[kw];
-        real mfcca = (dist.f[BNE])[kb];
-        real mfaaa = (dist.f[BSW])[kbsw];
-        real mfcaa = (dist.f[BSE])[kbs];
-        real mfaca = (dist.f[BNW])[kbw];
+        real mfcbb = (dist.f[DIR_P00])[k];
+        real mfabb = (dist.f[DIR_M00])[kw];
+        real mfbcb = (dist.f[DIR_0P0])[k];
+        real mfbab = (dist.f[DIR_0M0])[ks];
+        real mfbbc = (dist.f[DIR_00P])[k];
+        real mfbba = (dist.f[DIR_00M])[kb];
+        real mfccb = (dist.f[DIR_PP0])[k];
+        real mfaab = (dist.f[DIR_MM0])[ksw];
+        real mfcab = (dist.f[DIR_PM0])[ks];
+        real mfacb = (dist.f[DIR_MP0])[kw];
+        real mfcbc = (dist.f[DIR_P0P])[k];
+        real mfaba = (dist.f[DIR_M0M])[kbw];
+        real mfcba = (dist.f[DIR_P0M])[kb];
+        real mfabc = (dist.f[DIR_M0P])[kw];
+        real mfbcc = (dist.f[DIR_0PP])[k];
+        real mfbaa = (dist.f[DIR_0MM])[kbs];
+        real mfbca = (dist.f[DIR_0PM])[kb];
+        real mfbac = (dist.f[DIR_0MP])[ks];
+        real mfbbb = (dist.f[DIR_000])[k];
+        real mfccc = (dist.f[DIR_PPP])[k];
+        real mfaac = (dist.f[DIR_MMP])[ksw];
+        real mfcac = (dist.f[DIR_PMP])[ks];
+        real mfacc = (dist.f[DIR_MPP])[kw];
+        real mfcca = (dist.f[DIR_PPM])[kb];
+        real mfaaa = (dist.f[DIR_MMM])[kbsw];
+        real mfcaa = (dist.f[DIR_PMM])[kbs];
+        real mfaca = (dist.f[DIR_MPM])[kbw];
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
@@ -316,7 +316,7 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
         real O6 = c1o1;
 
         ////////////////////////////////////////////////////////////////////////////////////
-        //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
+        //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
         //! different bulk viscosity).
@@ -609,32 +609,32 @@ extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
         //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
         //! DOI:10.3390/computation5020019 ]</b></a>
         //!
-        (dist.f[E])[k]      = mfabb;
-        (dist.f[W])[kw]     = mfcbb;
-        (dist.f[N])[k]      = mfbab;
-        (dist.f[S])[ks]     = mfbcb;
-        (dist.f[T])[k]      = mfbba;
-        (dist.f[B])[kb]     = mfbbc;
-        (dist.f[NE])[k]     = mfaab;
-        (dist.f[SW])[ksw]   = mfccb;
-        (dist.f[SE])[ks]    = mfacb;
-        (dist.f[NW])[kw]    = mfcab;
-        (dist.f[TE])[k]     = mfaba;
-        (dist.f[BW])[kbw]   = mfcbc;
-        (dist.f[BE])[kb]    = mfabc;
-        (dist.f[TW])[kw]    = mfcba;
-        (dist.f[TN])[k]     = mfbaa;
-        (dist.f[BS])[kbs]   = mfbcc;
-        (dist.f[BN])[kb]    = mfbac;
-        (dist.f[TS])[ks]    = mfbca;
-        (dist.f[REST])[k]   = mfbbb;
-        (dist.f[TNE])[k]    = mfaaa;
-        (dist.f[TSE])[ks]   = mfaca;
-        (dist.f[BNE])[kb]   = mfaac;
-        (dist.f[BSE])[kbs]  = mfacc;
-        (dist.f[TNW])[kw]   = mfcaa;
-        (dist.f[TSW])[ksw]  = mfcca;
-        (dist.f[BNW])[kbw]  = mfcac;
-        (dist.f[BSW])[kbsw] = mfccc;
+        (dist.f[DIR_P00])[k]      = mfabb;
+        (dist.f[DIR_M00])[kw]     = mfcbb;
+        (dist.f[DIR_0P0])[k]      = mfbab;
+        (dist.f[DIR_0M0])[ks]     = mfbcb;
+        (dist.f[DIR_00P])[k]      = mfbba;
+        (dist.f[DIR_00M])[kb]     = mfbbc;
+        (dist.f[DIR_PP0])[k]     = mfaab;
+        (dist.f[DIR_MM0])[ksw]   = mfccb;
+        (dist.f[DIR_PM0])[ks]    = mfacb;
+        (dist.f[DIR_MP0])[kw]    = mfcab;
+        (dist.f[DIR_P0P])[k]     = mfaba;
+        (dist.f[DIR_M0M])[kbw]   = mfcbc;
+        (dist.f[DIR_P0M])[kb]    = mfabc;
+        (dist.f[DIR_M0P])[kw]    = mfcba;
+        (dist.f[DIR_0PP])[k]     = mfbaa;
+        (dist.f[DIR_0MM])[kbs]   = mfbcc;
+        (dist.f[DIR_0PM])[kb]    = mfbac;
+        (dist.f[DIR_0MP])[ks]    = mfbca;
+        (dist.f[DIR_000])[k]   = mfbbb;
+        (dist.f[DIR_PPP])[k]    = mfaaa;
+        (dist.f[DIR_PMP])[ks]   = mfaca;
+        (dist.f[DIR_PPM])[kb]   = mfaac;
+        (dist.f[DIR_PMM])[kbs]  = mfacc;
+        (dist.f[DIR_MPP])[kw]   = mfcaa;
+        (dist.f[DIR_MMP])[ksw]  = mfcca;
+        (dist.f[DIR_MPM])[kbw]  = mfcac;
+        (dist.f[DIR_MMM])[kbsw] = mfccc;
     }
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
index 035e438ccf8df70c5df43c70f0b4a5ffe160acc8..f74192c0423ba9dc96820d7f46eecb9d49a39ed4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK17CompChimStream(
+__global__ void LB_Kernel_CumulantK17CompChimStream(
 	real omega,
 	uint* neighborX,
 	uint* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
index 66170ecb09effc282c17ea42d3a2e4a07b8313b6..bb42d113e47ce28f153ac295f2d9a934dd1b213a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK18Comp(
+__global__ void LB_Kernel_CumulantK18Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -42,83 +42,83 @@ extern "C" __global__ void LB_Kernel_CumulantK18Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[E] = &G6[E   *size_Mat];
-				G.g[W] = &G6[W   *size_Mat];
-				G.g[N] = &G6[N   *size_Mat];
-				G.g[S] = &G6[S   *size_Mat];
-				G.g[T] = &G6[T   *size_Mat];
-				G.g[B] = &G6[B   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 			}
 			else
 			{
-				G.g[W] = &G6[E   *size_Mat];
-				G.g[E] = &G6[W   *size_Mat];
-				G.g[S] = &G6[N   *size_Mat];
-				G.g[N] = &G6[S   *size_Mat];
-				G.g[B] = &G6[T   *size_Mat];
-				G.g[T] = &G6[B   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -152,43 +152,43 @@ extern "C" __global__ void LB_Kernel_CumulantK18Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mgcbb = (G.g[E])[k];
-			real mgabb = (G.g[W])[kw];
-			real mgbcb = (G.g[N])[k];
-			real mgbab = (G.g[S])[ks];
-			real mgbbc = (G.g[T])[k];
-			real mgbba = (G.g[B])[kb];
+			real mgcbb = (G.g[DIR_P00])[k];
+			real mgabb = (G.g[DIR_M00])[kw];
+			real mgbcb = (G.g[DIR_0P0])[k];
+			real mgbab = (G.g[DIR_0M0])[ks];
+			real mgbbc = (G.g[DIR_00P])[k];
+			real mgbba = (G.g[DIR_00M])[kb];
 			real dxxux = c1o2 * (-mgcbb + mgabb);
 			real dyyuy = c1o2 * (-mgbcb + mgbab);
 			real dzzuz = c1o2 * (-mgbbc + mgbba);
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -915,41 +915,41 @@ extern "C" __global__ void LB_Kernel_CumulantK18Comp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;
-			(D.f[W])[kw] = mfcbb;
-			(D.f[N])[k] = mfbab;
-			(D.f[S])[ks] = mfbcb;
-			(D.f[T])[k] = mfbba;
-			(D.f[B])[kb] = mfbbc;
-			(D.f[NE])[k] = mfaab;
-			(D.f[SW])[ksw] = mfccb;
-			(D.f[SE])[ks] = mfacb;
-			(D.f[NW])[kw] = mfcab;
-			(D.f[TE])[k] = mfaba;
-			(D.f[BW])[kbw] = mfcbc;
-			(D.f[BE])[kb] = mfabc;
-			(D.f[TW])[kw] = mfcba;
-			(D.f[TN])[k] = mfbaa;
-			(D.f[BS])[kbs] = mfbcc;
-			(D.f[BN])[kb] = mfbac;
-			(D.f[TS])[ks] = mfbca;
-			(D.f[REST])[k] = mfbbb;
-			(D.f[TNE])[k] = mfaaa;
-			(D.f[TSE])[ks] = mfaca;
-			(D.f[BNE])[kb] = mfaac;
-			(D.f[BSE])[kbs] = mfacc;
-			(D.f[TNW])[kw] = mfcaa;
-			(D.f[TSW])[ksw] = mfcca;
-			(D.f[BNW])[kbw] = mfcac;
-			(D.f[BSW])[kbsw] = mfccc;
-			////////////////////////////////////////////////////////////////////////////////////
-
-			(G.g[E])[k] = mgabb;
-			(G.g[W])[kw] = mgcbb;
-			(G.g[N])[k] = mgbab;
-			(G.g[S])[ks] = mgbcb;
-			(G.g[T])[k] = mgbba;
-			(G.g[B])[kb] = mgbbc;
+			(D.f[DIR_P00])[k] = mfabb;
+			(D.f[DIR_M00])[kw] = mfcbb;
+			(D.f[DIR_0P0])[k] = mfbab;
+			(D.f[DIR_0M0])[ks] = mfbcb;
+			(D.f[DIR_00P])[k] = mfbba;
+			(D.f[DIR_00M])[kb] = mfbbc;
+			(D.f[DIR_PP0])[k] = mfaab;
+			(D.f[DIR_MM0])[ksw] = mfccb;
+			(D.f[DIR_PM0])[ks] = mfacb;
+			(D.f[DIR_MP0])[kw] = mfcab;
+			(D.f[DIR_P0P])[k] = mfaba;
+			(D.f[DIR_M0M])[kbw] = mfcbc;
+			(D.f[DIR_P0M])[kb] = mfabc;
+			(D.f[DIR_M0P])[kw] = mfcba;
+			(D.f[DIR_0PP])[k] = mfbaa;
+			(D.f[DIR_0MM])[kbs] = mfbcc;
+			(D.f[DIR_0PM])[kb] = mfbac;
+			(D.f[DIR_0MP])[ks] = mfbca;
+			(D.f[DIR_000])[k] = mfbbb;
+			(D.f[DIR_PPP])[k] = mfaaa;
+			(D.f[DIR_PMP])[ks] = mfaca;
+			(D.f[DIR_PPM])[kb] = mfaac;
+			(D.f[DIR_PMM])[kbs] = mfacc;
+			(D.f[DIR_MPP])[kw] = mfcaa;
+			(D.f[DIR_MMP])[ksw] = mfcca;
+			(D.f[DIR_MPM])[kbw] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
+			////////////////////////////////////////////////////////////////////////////////////
+
+			(G.g[DIR_P00])[k] = mgabb;
+			(G.g[DIR_M00])[kw] = mgcbb;
+			(G.g[DIR_0P0])[k] = mgbab;
+			(G.g[DIR_0M0])[ks] = mgbcb;
+			(G.g[DIR_00P])[k] = mgbba;
+			(G.g[DIR_00M])[kb] = mgbbc;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
index 1d2358370ecd9baabacdda345973324e7e2382f7..60a15145e3c117cadb7485f2899ba768b10eb0c1 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK18Comp(	real omega,
+__global__ void LB_Kernel_CumulantK18Comp(	real omega,
 														unsigned int* bcMatD,
 														unsigned int* neighborX,
 														unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
index acbca26e9cb37b74a59efe688143934acb65f335..c805fc293aeb8b182bb0e01df82b584da69d0175 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK20Comp(
+__global__ void LB_Kernel_CumulantK20Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -42,83 +42,83 @@ extern "C" __global__ void LB_Kernel_CumulantK20Comp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			Distributions6 G;
 			if (EvenOrOdd == true)
 			{
-				G.g[E] = &G6[E   *size_Mat];
-				G.g[W] = &G6[W   *size_Mat];
-				G.g[N] = &G6[N   *size_Mat];
-				G.g[S] = &G6[S   *size_Mat];
-				G.g[T] = &G6[T   *size_Mat];
-				G.g[B] = &G6[B   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 			}
 			else
 			{
-				G.g[W] = &G6[E   *size_Mat];
-				G.g[E] = &G6[W   *size_Mat];
-				G.g[S] = &G6[N   *size_Mat];
-				G.g[N] = &G6[S   *size_Mat];
-				G.g[B] = &G6[T   *size_Mat];
-				G.g[T] = &G6[B   *size_Mat];
+				G.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+				G.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+				G.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+				G.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+				G.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+				G.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -151,43 +151,43 @@ extern "C" __global__ void LB_Kernel_CumulantK20Comp(
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mgcbb = (G.g[E])[k];
-			real mgabb = (G.g[W])[kw];
-			real mgbcb = (G.g[N])[k];
-			real mgbab = (G.g[S])[ks];
-			real mgbbc = (G.g[T])[k];
-			real mgbba = (G.g[B])[kb];
+			real mgcbb = (G.g[DIR_P00])[k];
+			real mgabb = (G.g[DIR_M00])[kw];
+			real mgbcb = (G.g[DIR_0P0])[k];
+			real mgbab = (G.g[DIR_0M0])[ks];
+			real mgbbc = (G.g[DIR_00P])[k];
+			real mgbba = (G.g[DIR_00M])[kb];
 			real dxuxdxux = c1o2 * (-mgcbb + mgabb);
 			real dyuydyuy = c1o2 * (-mgbcb + mgbab);
 			real dzuzdzuz = c1o2 * (-mgbbc + mgbba);
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -919,41 +919,41 @@ extern "C" __global__ void LB_Kernel_CumulantK20Comp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;
-			(D.f[W])[kw] = mfcbb;
-			(D.f[N])[k] = mfbab;
-			(D.f[S])[ks] = mfbcb;
-			(D.f[T])[k] = mfbba;
-			(D.f[B])[kb] = mfbbc;
-			(D.f[NE])[k] = mfaab;
-			(D.f[SW])[ksw] = mfccb;
-			(D.f[SE])[ks] = mfacb;
-			(D.f[NW])[kw] = mfcab;
-			(D.f[TE])[k] = mfaba;
-			(D.f[BW])[kbw] = mfcbc;
-			(D.f[BE])[kb] = mfabc;
-			(D.f[TW])[kw] = mfcba;
-			(D.f[TN])[k] = mfbaa;
-			(D.f[BS])[kbs] = mfbcc;
-			(D.f[BN])[kb] = mfbac;
-			(D.f[TS])[ks] = mfbca;
-			(D.f[REST])[k] = mfbbb;
-			(D.f[TNE])[k] = mfaaa;
-			(D.f[TSE])[ks] = mfaca;
-			(D.f[BNE])[kb] = mfaac;
-			(D.f[BSE])[kbs] = mfacc;
-			(D.f[TNW])[kw] = mfcaa;
-			(D.f[TSW])[ksw] = mfcca;
-			(D.f[BNW])[kbw] = mfcac;
-			(D.f[BSW])[kbsw] = mfccc;
-			////////////////////////////////////////////////////////////////////////////////////
-
-			(G.g[E])[k] = mgabb;
-			(G.g[W])[kw] = mgcbb;
-			(G.g[N])[k] = mgbab;
-			(G.g[S])[ks] = mgbcb;
-			(G.g[T])[k] = mgbba;
-			(G.g[B])[kb] = mgbbc;
+			(D.f[DIR_P00])[k] = mfabb;
+			(D.f[DIR_M00])[kw] = mfcbb;
+			(D.f[DIR_0P0])[k] = mfbab;
+			(D.f[DIR_0M0])[ks] = mfbcb;
+			(D.f[DIR_00P])[k] = mfbba;
+			(D.f[DIR_00M])[kb] = mfbbc;
+			(D.f[DIR_PP0])[k] = mfaab;
+			(D.f[DIR_MM0])[ksw] = mfccb;
+			(D.f[DIR_PM0])[ks] = mfacb;
+			(D.f[DIR_MP0])[kw] = mfcab;
+			(D.f[DIR_P0P])[k] = mfaba;
+			(D.f[DIR_M0M])[kbw] = mfcbc;
+			(D.f[DIR_P0M])[kb] = mfabc;
+			(D.f[DIR_M0P])[kw] = mfcba;
+			(D.f[DIR_0PP])[k] = mfbaa;
+			(D.f[DIR_0MM])[kbs] = mfbcc;
+			(D.f[DIR_0PM])[kb] = mfbac;
+			(D.f[DIR_0MP])[ks] = mfbca;
+			(D.f[DIR_000])[k] = mfbbb;
+			(D.f[DIR_PPP])[k] = mfaaa;
+			(D.f[DIR_PMP])[ks] = mfaca;
+			(D.f[DIR_PPM])[kb] = mfaac;
+			(D.f[DIR_PMM])[kbs] = mfacc;
+			(D.f[DIR_MPP])[kw] = mfcaa;
+			(D.f[DIR_MMP])[ksw] = mfcca;
+			(D.f[DIR_MPM])[kbw] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
+			////////////////////////////////////////////////////////////////////////////////////
+
+			(G.g[DIR_P00])[k] = mgabb;
+			(G.g[DIR_M00])[kw] = mgcbb;
+			(G.g[DIR_0P0])[k] = mgbab;
+			(G.g[DIR_0M0])[ks] = mgbcb;
+			(G.g[DIR_00P])[k] = mgbba;
+			(G.g[DIR_00M])[kb] = mgbbc;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
index f6280969d1f501f69cbf912fca07b14cbe458b25..17691f621b5a46d29556d71304195f4a346a7ec6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK20/CumulantK20Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK20Comp(	real omega,
+__global__ void LB_Kernel_CumulantK20Comp(	real omega,
 															unsigned int* bcMatD,
 															unsigned int* neighborX,
 															unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
index e25926fd8186bb1fdb09f8610bc5e4719614f645..a9aefa2d62a962766470c93a62adeefa4f19570e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 			real rho = (mfccc + mfaaa + mfaca + mfcac + mfacc + mfcaa + mfaac + mfcca +
 				mfbac + mfbca + mfbaa + mfbcc + mfabc + mfcba + mfaba + mfcbc + mfacb + mfcab + mfaab + mfccb +
@@ -739,33 +739,33 @@ extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
index 39b7562a28c39df06b9fe06a431001939c33deb6..947ce68259432efe87af84fd9986916e62521397 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Compressible/MRT/MRTCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Comp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
index db2314b67f8c4cdfea156bd76f6eee2c3cb11f59..9a94006b8a1be745fc2bcfdd80e454152347139d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real fE = (D.f[E])[k];//ke
-			real fW = (D.f[W])[kw];
-			real fN = (D.f[N])[k];//kn
-			real fS = (D.f[S])[ks];
-			real fT = (D.f[T])[k];//kt
-			real fB = (D.f[B])[kb];
-			real fNE = (D.f[NE])[k];//kne
-			real fSW = (D.f[SW])[ksw];
-			real fSE = (D.f[SE])[ks];//kse
-			real fNW = (D.f[NW])[kw];//knw
-			real fTE = (D.f[TE])[k];//kte
-			real fBW = (D.f[BW])[kbw];
-			real fBE = (D.f[BE])[kb];//kbe
-			real fTW = (D.f[TW])[kw];//ktw
-			real fTN = (D.f[TN])[k];//ktn
-			real fBS = (D.f[BS])[kbs];
-			real fBN = (D.f[BN])[kb];//kbn
-			real fTS = (D.f[TS])[ks];//kts
-			real fZERO = (D.f[REST])[k];//kzero
-			real fTNE = (D.f[TNE])[k];//ktne
-			real fTSW = (D.f[TSW])[ksw];//ktsw
-			real fTSE = (D.f[TSE])[ks];//ktse
-			real fTNW = (D.f[TNW])[kw];//ktnw
-			real fBNE = (D.f[BNE])[kb];//kbne
-			real fBSW = (D.f[BSW])[kbsw];
-			real fBSE = (D.f[BSE])[kbs];//kbse
-			real fBNW = (D.f[BNW])[kbw];//kbnw
+			real fE = (D.f[DIR_P00])[k];//ke
+			real fW = (D.f[DIR_M00])[kw];
+			real fN = (D.f[DIR_0P0])[k];//kn
+			real fS = (D.f[DIR_0M0])[ks];
+			real fT = (D.f[DIR_00P])[k];//kt
+			real fB = (D.f[DIR_00M])[kb];
+			real fNE = (D.f[DIR_PP0])[k];//kne
+			real fSW = (D.f[DIR_MM0])[ksw];
+			real fSE = (D.f[DIR_PM0])[ks];//kse
+			real fNW = (D.f[DIR_MP0])[kw];//knw
+			real fTE = (D.f[DIR_P0P])[k];//kte
+			real fBW = (D.f[DIR_M0M])[kbw];
+			real fBE = (D.f[DIR_P0M])[kb];//kbe
+			real fTW = (D.f[DIR_M0P])[kw];//ktw
+			real fTN = (D.f[DIR_0PP])[k];//ktn
+			real fBS = (D.f[DIR_0MM])[kbs];
+			real fBN = (D.f[DIR_0PM])[kb];//kbn
+			real fTS = (D.f[DIR_0MP])[ks];//kts
+			real fZERO = (D.f[DIR_000])[k];//kzero
+			real fTNE = (D.f[DIR_PPP])[k];//ktne
+			real fTSW = (D.f[DIR_MMP])[ksw];//ktsw
+			real fTSE = (D.f[DIR_PMP])[ks];//ktse
+			real fTNW = (D.f[DIR_MPP])[kw];//ktnw
+			real fBNE = (D.f[DIR_PPM])[kb];//kbne
+			real fBSW = (D.f[DIR_MMM])[kbsw];
+			real fBSE = (D.f[DIR_PMM])[kbs];//kbse
+			real fBNW = (D.f[DIR_MPM])[kbw];//kbnw
 										   ////////////////////////////////////////////////////////////////////////////////
 
 
@@ -206,33 +206,33 @@ extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 
 
 			//////////////////////////////////////////////////////////////////////////                            
-			(D.f[E])[k] = fW;
-			(D.f[W])[kw] = fE;
-			(D.f[N])[k] = fS;
-			(D.f[S])[ks] = fN;
-			(D.f[T])[k] = fB;
-			(D.f[B])[kb] = fT;
-			(D.f[NE])[k] = fSW;
-			(D.f[SW])[ksw] = fNE;
-			(D.f[SE])[ks] = fNW;
-			(D.f[NW])[kw] = fSE;
-			(D.f[TE])[k] = fBW;
-			(D.f[BW])[kbw] = fTE;
-			(D.f[BE])[kb] = fTW;
-			(D.f[TW])[kw] = fBE;
-			(D.f[TN])[k] = fBS;
-			(D.f[BS])[kbs] = fTN;
-			(D.f[BN])[kb] = fTS;
-			(D.f[TS])[ks] = fBN;
-			(D.f[REST])[k] = fZERO;
-			(D.f[TNE])[k] = fBSW;
-			(D.f[TSE])[ks] = fBNW;
-			(D.f[BNE])[kb] = fTSW;
-			(D.f[BSE])[kbs] = fTNW;
-			(D.f[TNW])[kw] = fBSE;
-			(D.f[TSW])[ksw] = fBNE;
-			(D.f[BNW])[kbw] = fTSE;
-			(D.f[BSW])[kbsw] = fTNE;
+			(D.f[DIR_P00])[k] = fW;
+			(D.f[DIR_M00])[kw] = fE;
+			(D.f[DIR_0P0])[k] = fS;
+			(D.f[DIR_0M0])[ks] = fN;
+			(D.f[DIR_00P])[k] = fB;
+			(D.f[DIR_00M])[kb] = fT;
+			(D.f[DIR_PP0])[k] = fSW;
+			(D.f[DIR_MM0])[ksw] = fNE;
+			(D.f[DIR_PM0])[ks] = fNW;
+			(D.f[DIR_MP0])[kw] = fSE;
+			(D.f[DIR_P0P])[k] = fBW;
+			(D.f[DIR_M0M])[kbw] = fTE;
+			(D.f[DIR_P0M])[kb] = fTW;
+			(D.f[DIR_M0P])[kw] = fBE;
+			(D.f[DIR_0PP])[k] = fBS;
+			(D.f[DIR_0MM])[kbs] = fTN;
+			(D.f[DIR_0PM])[kb] = fTS;
+			(D.f[DIR_0MP])[ks] = fBN;
+			(D.f[DIR_000])[k] = fZERO;
+			(D.f[DIR_PPP])[k] = fBSW;
+			(D.f[DIR_PMP])[ks] = fBNW;
+			(D.f[DIR_PPM])[kb] = fTSW;
+			(D.f[DIR_PMM])[kbs] = fTNW;
+			(D.f[DIR_MPP])[kw] = fBSE;
+			(D.f[DIR_MMP])[ksw] = fBNE;
+			(D.f[DIR_MPM])[kbw] = fTSE;
+			(D.f[DIR_MMM])[kbsw] = fTNE;
 			//////////////////////////////////////////////////////////////////////////                            
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
index 20cdefb5a6acd961b9f93ca5cb759bfe742dc990..f1a90b45238a2df4d93860d7e77cb1242b9fbd90 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGK/BGKIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
index ab28fc544e66de7e31050ab125c9ca7bef40e260..9355e42aa5b05190f063f5247d8d6c0dea787a02 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -724,33 +724,33 @@ extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
index 327ea4ddc933ccf5e9e75a7fd6d44e4d59b86d56..7f85f3ca29d4d8d7620e2503df9947fd7e42fe8f 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/BGKPlus/BGKPlusIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_BGK_Plus_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
index d88f7492c9bf366a57155826fb57d328816cc2b2..92cc749b135739d5f38c9916c4ee0da7497e5f2d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -728,33 +728,33 @@ extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
index c9f6ec42f8ce1512e84c193484c48e78d827f71a..a531fa7bd64b9782f43f800b29fe666504612f1a 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cascade/CascadeIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cascade_Incomp_SP_27(real s9,
+__global__ void LB_Kernel_Cascade_Incomp_SP_27(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
index 9d3fe2781a4cc3fabb4ee24a5b980360ab324b94..0243046082ce1853011c6632d5a2f80364ebe0db 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 	real deltaPhi,
 	real angularVelocity,
 	unsigned int* bcMatD,
@@ -42,63 +42,63 @@ extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -159,33 +159,33 @@ extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//Ship
 			real coord0X = 281.125f;//7.5f;
@@ -1161,33 +1161,33 @@ extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
index 3117d4e1e34d1d086711e9c5f1f2f6ecf9c4705f..0679e770f244ad2e2c59c9cacf9d3524a640a42e 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/Cumulant1hSP27/Cumulant1hIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_1h_Incomp_SP_27(real omega,
 	real deltaPhi,
 	real angularVelocity,
 	unsigned int* bcMatD,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
index bfcb586e71f96b0f92d9a0331c38f06070848adb..64d697f2b0953cee75f4397e399a0e6128e486a2 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -129,33 +129,33 @@ extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -973,33 +973,33 @@ extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
index 9ed2f167ecb199429c7497acc72f298db41a0dc2..57dc7180ce7900333b4071db409a03ac847dd641 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantIsoSP27/CumulantIsoIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_Cum_IsoTest_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
index 1727427aef2de70c3810cf08e68128c00e4aab5a..fc108ef1ef109a40735e250bd9a0f21491e4f977 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real omega,
+__global__ void LB_Kernel_CumulantK15Incomp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -154,33 +154,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 			//unsigned int ktne = k;
 			//unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -1206,33 +1206,33 @@ extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
index 112e9f7a99bf683e8bacb348633f46a10e149e9e..f2b5063f9db6d55b9efb547c0c05d450463e0509 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/CumulantK15/CumulantK15Incomp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_CumulantK15Incomp(real s9,
+__global__ void LB_Kernel_CumulantK15Incomp(real s9,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
index 700cd974b1c739dd5cb3722917453707e465972f..f6a283c2f9ba3c15729061ebeabcf34edd0abe97 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
@@ -37,63 +37,63 @@ extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -126,33 +126,33 @@ extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 			//unsigned int ktne = k;
 			unsigned int kbsw = neighborZ[ksw];
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];//[ke   ];// +  c2over27 ;(D.f[E   ])[k  ];//ke
-			real mfabb = (D.f[W])[kw];//[kw   ];// +  c2over27 ;(D.f[W   ])[kw ];
-			real mfbcb = (D.f[N])[k];//[kn   ];// +  c2over27 ;(D.f[N   ])[k  ];//kn
-			real mfbab = (D.f[S])[ks];//[ks   ];// +  c2over27 ;(D.f[S   ])[ks ];
-			real mfbbc = (D.f[T])[k];//[kt   ];// +  c2over27 ;(D.f[T   ])[k  ];//kt
-			real mfbba = (D.f[B])[kb];//[kb   ];// +  c2over27 ;(D.f[B   ])[kb ];
-			real mfccb = (D.f[NE])[k];//[kne  ];// +  c1over54 ;(D.f[NE  ])[k  ];//kne
-			real mfaab = (D.f[SW])[ksw];//[ksw  ];// +  c1over54 ;(D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE])[ks];//[kse  ];// +  c1over54 ;(D.f[SE  ])[ks ];//kse
-			real mfacb = (D.f[NW])[kw];//[knw  ];// +  c1over54 ;(D.f[NW  ])[kw ];//knw
-			real mfcbc = (D.f[TE])[k];//[kte  ];// +  c1over54 ;(D.f[TE  ])[k  ];//kte
-			real mfaba = (D.f[BW])[kbw];//[kbw  ];// +  c1over54 ;(D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE])[kb];//[kbe  ];// +  c1over54 ;(D.f[BE  ])[kb ];//kbe
-			real mfabc = (D.f[TW])[kw];//[ktw  ];// +  c1over54 ;(D.f[TW  ])[kw ];//ktw
-			real mfbcc = (D.f[TN])[k];//[ktn  ];// +  c1over54 ;(D.f[TN  ])[k  ];//ktn
-			real mfbaa = (D.f[BS])[kbs];//[kbs  ];// +  c1over54 ;(D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN])[kb];//[kbn  ];// +  c1over54 ;(D.f[BN  ])[kb ];//kbn
-			real mfbac = (D.f[TS])[ks];//[kts  ];// +  c1over54 ;(D.f[TS  ])[ks ];//kts
-			real mfbbb = (D.f[REST])[k];//[kzero];// +  c8over27 ;(D.f[REST])[k  ];//kzero
-			real mfccc = (D.f[TNE])[k];//[ktne ];// +  c1over216;(D.f[TNE ])[k  ];//ktne
-			real mfaac = (D.f[TSW])[ksw];//[ktsw ];// +  c1over216;(D.f[TSW ])[ksw];//ktsw
-			real mfcac = (D.f[TSE])[ks];//[ktse ];// +  c1over216;(D.f[TSE ])[ks ];//ktse
-			real mfacc = (D.f[TNW])[kw];//[ktnw ];// +  c1over216;(D.f[TNW ])[kw ];//ktnw
-			real mfcca = (D.f[BNE])[kb];//[kbne ];// +  c1over216;(D.f[BNE ])[kb ];//kbne
-			real mfaaa = (D.f[BSW])[kbsw];//[kbsw ];// +  c1over216;(D.f[BSW ])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];//[kbse ];// +  c1over216;(D.f[BSE ])[kbs];//kbse
-			real mfaca = (D.f[BNW])[kbw];//[kbnw ];// +  c1over216;(D.f[BNW ])[kbw];//kbnw
+			real mfcbb = (D.f[DIR_P00])[k];//[ke   ];// +  c2over27 ;(D.f[DIR_P00   ])[k  ];//ke
+			real mfabb = (D.f[DIR_M00])[kw];//[kw   ];// +  c2over27 ;(D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0])[k];//[kn   ];// +  c2over27 ;(D.f[DIR_0P0   ])[k  ];//kn
+			real mfbab = (D.f[DIR_0M0])[ks];//[ks   ];// +  c2over27 ;(D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P])[k];//[kt   ];// +  c2over27 ;(D.f[DIR_00P   ])[k  ];//kt
+			real mfbba = (D.f[DIR_00M])[kb];//[kb   ];// +  c2over27 ;(D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0])[k];//[kne  ];// +  c1over54 ;(D.f[DIR_PP0  ])[k  ];//kne
+			real mfaab = (D.f[DIR_MM0])[ksw];//[ksw  ];// +  c1over54 ;(D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];//[kse  ];// +  c1over54 ;(D.f[DIR_PM0  ])[ks ];//kse
+			real mfacb = (D.f[DIR_MP0])[kw];//[knw  ];// +  c1over54 ;(D.f[DIR_MP0  ])[kw ];//knw
+			real mfcbc = (D.f[DIR_P0P])[k];//[kte  ];// +  c1over54 ;(D.f[DIR_P0P  ])[k  ];//kte
+			real mfaba = (D.f[DIR_M0M])[kbw];//[kbw  ];// +  c1over54 ;(D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];//[kbe  ];// +  c1over54 ;(D.f[DIR_P0M  ])[kb ];//kbe
+			real mfabc = (D.f[DIR_M0P])[kw];//[ktw  ];// +  c1over54 ;(D.f[DIR_M0P  ])[kw ];//ktw
+			real mfbcc = (D.f[DIR_0PP])[k];//[ktn  ];// +  c1over54 ;(D.f[DIR_0PP  ])[k  ];//ktn
+			real mfbaa = (D.f[DIR_0MM])[kbs];//[kbs  ];// +  c1over54 ;(D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];//[kbn  ];// +  c1over54 ;(D.f[DIR_0PM  ])[kb ];//kbn
+			real mfbac = (D.f[DIR_0MP])[ks];//[kts  ];// +  c1over54 ;(D.f[DIR_0MP  ])[ks ];//kts
+			real mfbbb = (D.f[DIR_000])[k];//[kzero];// +  c8over27 ;(D.f[DIR_000])[k  ];//kzero
+			real mfccc = (D.f[DIR_PPP])[k];//[ktne ];// +  c1over216;(D.f[DIR_PPP ])[k  ];//ktne
+			real mfaac = (D.f[DIR_MMP])[ksw];//[ktsw ];// +  c1over216;(D.f[DIR_MMP ])[ksw];//ktsw
+			real mfcac = (D.f[DIR_PMP])[ks];//[ktse ];// +  c1over216;(D.f[DIR_PMP ])[ks ];//ktse
+			real mfacc = (D.f[DIR_MPP])[kw];//[ktnw ];// +  c1over216;(D.f[DIR_MPP ])[kw ];//ktnw
+			real mfcca = (D.f[DIR_PPM])[kb];//[kbne ];// +  c1over216;(D.f[DIR_PPM ])[kb ];//kbne
+			real mfaaa = (D.f[DIR_MMM])[kbsw];//[kbsw ];// +  c1over216;(D.f[DIR_MMM ])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];//[kbse ];// +  c1over216;(D.f[DIR_PMM ])[kbs];//kbse
+			real mfaca = (D.f[DIR_MPM])[kbw];//[kbnw ];// +  c1over216;(D.f[DIR_MPM ])[kbw];//kbnw
 											////////////////////////////////////////////////////////////////////////////////////
 											//slow
 											//real oMdrho = one - ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
@@ -731,33 +731,33 @@ extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;//(D.f[ E   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ E   ])[k   ]                                                                     
-			(D.f[W])[kw] = mfcbb;//(D.f[ W   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ W   ])[kw  ]                                                                   
-			(D.f[N])[k] = mfbab;//(D.f[ N   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ N   ])[k   ]
-			(D.f[S])[ks] = mfbcb;//(D.f[ S   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ S   ])[ks  ]
-			(D.f[T])[k] = mfbba;//(D.f[ T   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ T   ])[k   ]
-			(D.f[B])[kb] = mfbbc;//(D.f[ B   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ B   ])[kb  ]
-			(D.f[NE])[k] = mfaab;//(D.f[ NE  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ NE  ])[k   ]
-			(D.f[SW])[ksw] = mfccb;//(D.f[ SW  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ SW  ])[ksw ]
-			(D.f[SE])[ks] = mfacb;//(D.f[ SE  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ SE  ])[ks  ]
-			(D.f[NW])[kw] = mfcab;//(D.f[ NW  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ NW  ])[kw  ]
-			(D.f[TE])[k] = mfaba;//(D.f[ TE  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ TE  ])[k   ]
-			(D.f[BW])[kbw] = mfcbc;//(D.f[ BW  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ BW  ])[kbw ]
-			(D.f[BE])[kb] = mfabc;//(D.f[ BE  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ BE  ])[kb  ]
-			(D.f[TW])[kw] = mfcba;//(D.f[ TW  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ TW  ])[kw  ]
-			(D.f[TN])[k] = mfbaa;//(D.f[ TN  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ TN  ])[k   ]
-			(D.f[BS])[kbs] = mfbcc;//(D.f[ BS  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ BS  ])[kbs ]
-			(D.f[BN])[kb] = mfbac;//(D.f[ BN  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ BN  ])[kb  ]
-			(D.f[TS])[ks] = mfbca;//(D.f[ TS  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ TS  ])[ks  ]
-			(D.f[REST])[k] = mfbbb;//(D.f[ REST])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ REST])[k   ]
-			(D.f[TNE])[k] = mfaaa;//(D.f[ TNE ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ TNE ])[k   ]
-			(D.f[TSE])[ks] = mfaca;//(D.f[ TSE ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ TSE ])[ks  ]
-			(D.f[BNE])[kb] = mfaac;//(D.f[ BNE ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ BNE ])[kb  ]
-			(D.f[BSE])[kbs] = mfacc;//(D.f[ BSE ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ BSE ])[kbs ]
-			(D.f[TNW])[kw] = mfcaa;//(D.f[ TNW ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ TNW ])[kw  ]
-			(D.f[TSW])[ksw] = mfcca;//(D.f[ TSW ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ TSW ])[ksw ]
-			(D.f[BNW])[kbw] = mfcac;//(D.f[ BNW ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ BNW ])[kbw ]
-			(D.f[BSW])[kbsw] = mfccc;//(D.f[ BSW ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ BSW ])[kbsw]
+			(D.f[DIR_P00])[k] = mfabb;//(D.f[ DIR_P00   ])[ke   ] = mfabb;// -  c2over27 ;  (D.f[ DIR_P00   ])[k   ]                                                                     
+			(D.f[DIR_M00])[kw] = mfcbb;//(D.f[ DIR_M00   ])[kw   ] = mfcbb;// -  c2over27 ;  (D.f[ DIR_M00   ])[kw  ]                                                                   
+			(D.f[DIR_0P0])[k] = mfbab;//(D.f[ DIR_0P0   ])[kn   ] = mfbab;// -  c2over27 ;	 (D.f[ DIR_0P0   ])[k   ]
+			(D.f[DIR_0M0])[ks] = mfbcb;//(D.f[ DIR_0M0   ])[ks   ] = mfbcb;// -  c2over27 ;	 (D.f[ DIR_0M0   ])[ks  ]
+			(D.f[DIR_00P])[k] = mfbba;//(D.f[ DIR_00P   ])[kt   ] = mfbba;// -  c2over27 ;	 (D.f[ DIR_00P   ])[k   ]
+			(D.f[DIR_00M])[kb] = mfbbc;//(D.f[ DIR_00M   ])[kb   ] = mfbbc;// -  c2over27 ;	 (D.f[ DIR_00M   ])[kb  ]
+			(D.f[DIR_PP0])[k] = mfaab;//(D.f[ DIR_PP0  ])[kne  ] = mfaab;// -  c1over54 ;	 (D.f[ DIR_PP0  ])[k   ]
+			(D.f[DIR_MM0])[ksw] = mfccb;//(D.f[ DIR_MM0  ])[ksw  ] = mfccb;// -  c1over54 ;	 (D.f[ DIR_MM0  ])[ksw ]
+			(D.f[DIR_PM0])[ks] = mfacb;//(D.f[ DIR_PM0  ])[kse  ] = mfacb;// -  c1over54 ;	 (D.f[ DIR_PM0  ])[ks  ]
+			(D.f[DIR_MP0])[kw] = mfcab;//(D.f[ DIR_MP0  ])[knw  ] = mfcab;// -  c1over54 ;	 (D.f[ DIR_MP0  ])[kw  ]
+			(D.f[DIR_P0P])[k] = mfaba;//(D.f[ DIR_P0P  ])[kte  ] = mfaba;// -  c1over54 ;	 (D.f[ DIR_P0P  ])[k   ]
+			(D.f[DIR_M0M])[kbw] = mfcbc;//(D.f[ DIR_M0M  ])[kbw  ] = mfcbc;// -  c1over54 ;	 (D.f[ DIR_M0M  ])[kbw ]
+			(D.f[DIR_P0M])[kb] = mfabc;//(D.f[ DIR_P0M  ])[kbe  ] = mfabc;// -  c1over54 ;	 (D.f[ DIR_P0M  ])[kb  ]
+			(D.f[DIR_M0P])[kw] = mfcba;//(D.f[ DIR_M0P  ])[ktw  ] = mfcba;// -  c1over54 ;	 (D.f[ DIR_M0P  ])[kw  ]
+			(D.f[DIR_0PP])[k] = mfbaa;//(D.f[ DIR_0PP  ])[ktn  ] = mfbaa;// -  c1over54 ;	 (D.f[ DIR_0PP  ])[k   ]
+			(D.f[DIR_0MM])[kbs] = mfbcc;//(D.f[ DIR_0MM  ])[kbs  ] = mfbcc;// -  c1over54 ;	 (D.f[ DIR_0MM  ])[kbs ]
+			(D.f[DIR_0PM])[kb] = mfbac;//(D.f[ DIR_0PM  ])[kbn  ] = mfbac;// -  c1over54 ;	 (D.f[ DIR_0PM  ])[kb  ]
+			(D.f[DIR_0MP])[ks] = mfbca;//(D.f[ DIR_0MP  ])[kts  ] = mfbca;// -  c1over54 ;	 (D.f[ DIR_0MP  ])[ks  ]
+			(D.f[DIR_000])[k] = mfbbb;//(D.f[ DIR_000])[kzero] = mfbbb;// -  c8over27 ;	 (D.f[ DIR_000])[k   ]
+			(D.f[DIR_PPP])[k] = mfaaa;//(D.f[ DIR_PPP ])[ktne ] = mfaaa;// -  c1over216;	 (D.f[ DIR_PPP ])[k   ]
+			(D.f[DIR_PMP])[ks] = mfaca;//(D.f[ DIR_PMP ])[ktse ] = mfaca;// -  c1over216;	 (D.f[ DIR_PMP ])[ks  ]
+			(D.f[DIR_PPM])[kb] = mfaac;//(D.f[ DIR_PPM ])[kbne ] = mfaac;// -  c1over216;	 (D.f[ DIR_PPM ])[kb  ]
+			(D.f[DIR_PMM])[kbs] = mfacc;//(D.f[ DIR_PMM ])[kbse ] = mfacc;// -  c1over216;	 (D.f[ DIR_PMM ])[kbs ]
+			(D.f[DIR_MPP])[kw] = mfcaa;//(D.f[ DIR_MPP ])[ktnw ] = mfcaa;// -  c1over216;	 (D.f[ DIR_MPP ])[kw  ]
+			(D.f[DIR_MMP])[ksw] = mfcca;//(D.f[ DIR_MMP ])[ktsw ] = mfcca;// -  c1over216;	 (D.f[ DIR_MMP ])[ksw ]
+			(D.f[DIR_MPM])[kbw] = mfcac;//(D.f[ DIR_MPM ])[kbnw ] = mfcac;// -  c1over216;	 (D.f[ DIR_MPM ])[kbw ]
+			(D.f[DIR_MMM])[kbsw] = mfccc;//(D.f[ DIR_MMM ])[kbsw ] = mfccc;// -  c1over216;	 (D.f[ DIR_MMM ])[kbsw]
 										////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
index c9a353d6e08c4110a546416fd5185af68b837c65..d3a9fcea7c1a53e4084acf8dc5f1f815d0da967d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/BasicKernels/FluidFlow/Incompressible/MRT/MRTIncompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
+__global__ void LB_Kernel_MRT_Incomp_SP_27(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
index e961c35e37dc870affdfe4abec9766e41fc47fdc..89975d1663fb236295c22b81af4b0544ffc489bb 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
@@ -24,63 +24,63 @@ extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	Distributions27 D;
 	if (EvenOrOdd == true)
 	{
-		D.f[E] = &DDStart[E   *size_Mat];
-		D.f[W] = &DDStart[W   *size_Mat];
-		D.f[N] = &DDStart[N   *size_Mat];
-		D.f[S] = &DDStart[S   *size_Mat];
-		D.f[T] = &DDStart[T   *size_Mat];
-		D.f[B] = &DDStart[B   *size_Mat];
-		D.f[NE] = &DDStart[NE  *size_Mat];
-		D.f[SW] = &DDStart[SW  *size_Mat];
-		D.f[SE] = &DDStart[SE  *size_Mat];
-		D.f[NW] = &DDStart[NW  *size_Mat];
-		D.f[TE] = &DDStart[TE  *size_Mat];
-		D.f[BW] = &DDStart[BW  *size_Mat];
-		D.f[BE] = &DDStart[BE  *size_Mat];
-		D.f[TW] = &DDStart[TW  *size_Mat];
-		D.f[TN] = &DDStart[TN  *size_Mat];
-		D.f[BS] = &DDStart[BS  *size_Mat];
-		D.f[BN] = &DDStart[BN  *size_Mat];
-		D.f[TS] = &DDStart[TS  *size_Mat];
-		D.f[REST] = &DDStart[REST*size_Mat];
-		D.f[TNE] = &DDStart[TNE *size_Mat];
-		D.f[TSW] = &DDStart[TSW *size_Mat];
-		D.f[TSE] = &DDStart[TSE *size_Mat];
-		D.f[TNW] = &DDStart[TNW *size_Mat];
-		D.f[BNE] = &DDStart[BNE *size_Mat];
-		D.f[BSW] = &DDStart[BSW *size_Mat];
-		D.f[BSE] = &DDStart[BSE *size_Mat];
-		D.f[BNW] = &DDStart[BNW *size_Mat];
+		D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+		D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+		D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+		D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+		D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+		D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+		D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+		D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+		D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+		D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+		D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+		D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+		D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+		D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+		D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+		D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+		D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+		D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+		D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+		D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+		D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+		D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+		D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+		D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+		D.f[DIR_PMM] = &DDStart[DIR_PMM *size_Mat];
+		D.f[DIR_MPM] = &DDStart[DIR_MPM *size_Mat];
 	}
 	else
 	{
-		D.f[W] = &DDStart[E   *size_Mat];
-		D.f[E] = &DDStart[W   *size_Mat];
-		D.f[S] = &DDStart[N   *size_Mat];
-		D.f[N] = &DDStart[S   *size_Mat];
-		D.f[B] = &DDStart[T   *size_Mat];
-		D.f[T] = &DDStart[B   *size_Mat];
-		D.f[SW] = &DDStart[NE  *size_Mat];
-		D.f[NE] = &DDStart[SW  *size_Mat];
-		D.f[NW] = &DDStart[SE  *size_Mat];
-		D.f[SE] = &DDStart[NW  *size_Mat];
-		D.f[BW] = &DDStart[TE  *size_Mat];
-		D.f[TE] = &DDStart[BW  *size_Mat];
-		D.f[TW] = &DDStart[BE  *size_Mat];
-		D.f[BE] = &DDStart[TW  *size_Mat];
-		D.f[BS] = &DDStart[TN  *size_Mat];
-		D.f[TN] = &DDStart[BS  *size_Mat];
-		D.f[TS] = &DDStart[BN  *size_Mat];
-		D.f[BN] = &DDStart[TS  *size_Mat];
-		D.f[REST] = &DDStart[REST*size_Mat];
-		D.f[BSW] = &DDStart[TNE *size_Mat];
-		D.f[BNE] = &DDStart[TSW *size_Mat];
-		D.f[BNW] = &DDStart[TSE *size_Mat];
-		D.f[BSE] = &DDStart[TNW *size_Mat];
-		D.f[TSW] = &DDStart[BNE *size_Mat];
-		D.f[TNE] = &DDStart[BSW *size_Mat];
-		D.f[TNW] = &DDStart[BSE *size_Mat];
-		D.f[TSE] = &DDStart[BNW *size_Mat];
+		D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+		D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+		D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+		D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+		D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+		D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+		D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+		D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+		D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+		D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+		D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+		D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+		D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+		D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+		D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+		D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+		D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+		D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+		D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+		D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+		D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+		D.f[DIR_MPM] = &DDStart[DIR_PMP *size_Mat];
+		D.f[DIR_PMM] = &DDStart[DIR_MPP *size_Mat];
+		D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+		D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+		D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+		D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 	}
 
 	////////////////////////////////////////////////////////////////////////////////
@@ -107,33 +107,33 @@ extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 		unsigned int kbs = neighborZ[ks];
 		unsigned int kbsw = neighborZ[ksw];
 		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-		real mfcbb = (D.f[E])[k];
-		real mfabb = (D.f[W])[kw];
-		real mfbcb = (D.f[N])[k];
-		real mfbab = (D.f[S])[ks];
-		real mfbbc = (D.f[T])[k];
-		real mfbba = (D.f[B])[kb];
-		real mfccb = (D.f[NE])[k];
-		real mfaab = (D.f[SW])[ksw];
-		real mfcab = (D.f[SE])[ks];
-		real mfacb = (D.f[NW])[kw];
-		real mfcbc = (D.f[TE])[k];
-		real mfaba = (D.f[BW])[kbw];
-		real mfcba = (D.f[BE])[kb];
-		real mfabc = (D.f[TW])[kw];
-		real mfbcc = (D.f[TN])[k];
-		real mfbaa = (D.f[BS])[kbs];
-		real mfbca = (D.f[BN])[kb];
-		real mfbac = (D.f[TS])[ks];
-		real mfbbb = (D.f[REST])[k];
-		real mfccc = (D.f[TNE])[k];
-		real mfaac = (D.f[TSW])[ksw];
-		real mfcac = (D.f[TSE])[ks];
-		real mfacc = (D.f[TNW])[kw];
-		real mfcca = (D.f[BNE])[kb];
-		real mfaaa = (D.f[BSW])[kbsw];
-		real mfcaa = (D.f[BSE])[kbs];
-		real mfaca = (D.f[BNW])[kbw];
+		real mfcbb = (D.f[DIR_P00])[k];
+		real mfabb = (D.f[DIR_M00])[kw];
+		real mfbcb = (D.f[DIR_0P0])[k];
+		real mfbab = (D.f[DIR_0M0])[ks];
+		real mfbbc = (D.f[DIR_00P])[k];
+		real mfbba = (D.f[DIR_00M])[kb];
+		real mfccb = (D.f[DIR_PP0])[k];
+		real mfaab = (D.f[DIR_MM0])[ksw];
+		real mfcab = (D.f[DIR_PM0])[ks];
+		real mfacb = (D.f[DIR_MP0])[kw];
+		real mfcbc = (D.f[DIR_P0P])[k];
+		real mfaba = (D.f[DIR_M0M])[kbw];
+		real mfcba = (D.f[DIR_P0M])[kb];
+		real mfabc = (D.f[DIR_M0P])[kw];
+		real mfbcc = (D.f[DIR_0PP])[k];
+		real mfbaa = (D.f[DIR_0MM])[kbs];
+		real mfbca = (D.f[DIR_0PM])[kb];
+		real mfbac = (D.f[DIR_0MP])[ks];
+		real mfbbb = (D.f[DIR_000])[k];
+		real mfccc = (D.f[DIR_PPP])[k];
+		real mfaac = (D.f[DIR_MMP])[ksw];
+		real mfcac = (D.f[DIR_PMP])[ks];
+		real mfacc = (D.f[DIR_MPP])[kw];
+		real mfcca = (D.f[DIR_PPM])[kb];
+		real mfaaa = (D.f[DIR_MMM])[kbsw];
+		real mfcaa = (D.f[DIR_PMM])[kbs];
+		real mfaca = (D.f[DIR_MPM])[kbw];
 		////////////////////////////////////////////////////////////////////////////////////
 		real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 			(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -904,33 +904,33 @@ extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 		////////////////////////////////////////////////////////////////////////////////////
 
 		////////////////////////////////////////////////////////////////////////////////////
-		(D.f[E])[k] = mfabb;
-		(D.f[W])[kw] = mfcbb;
-		(D.f[N])[k] = mfbab;
-		(D.f[S])[ks] = mfbcb;
-		(D.f[T])[k] = mfbba;
-		(D.f[B])[kb] = mfbbc;
-		(D.f[NE])[k] = mfaab;
-		(D.f[SW])[ksw] = mfccb;
-		(D.f[SE])[ks] = mfacb;
-		(D.f[NW])[kw] = mfcab;
-		(D.f[TE])[k] = mfaba;
-		(D.f[BW])[kbw] = mfcbc;
-		(D.f[BE])[kb] = mfabc;
-		(D.f[TW])[kw] = mfcba;
-		(D.f[TN])[k] = mfbaa;
-		(D.f[BS])[kbs] = mfbcc;
-		(D.f[BN])[kb] = mfbac;
-		(D.f[TS])[ks] = mfbca;
-		(D.f[REST])[k] = mfbbb;
-		(D.f[TNE])[k] = mfaaa;
-		(D.f[TSE])[ks] = mfaca;
-		(D.f[BNE])[kb] = mfaac;
-		(D.f[BSE])[kbs] = mfacc;
-		(D.f[TNW])[kw] = mfcaa;
-		(D.f[TSW])[ksw] = mfcca;
-		(D.f[BNW])[kbw] = mfcac;
-		(D.f[BSW])[kbsw] = mfccc;
+		(D.f[DIR_P00])[k] = mfabb;
+		(D.f[DIR_M00])[kw] = mfcbb;
+		(D.f[DIR_0P0])[k] = mfbab;
+		(D.f[DIR_0M0])[ks] = mfbcb;
+		(D.f[DIR_00P])[k] = mfbba;
+		(D.f[DIR_00M])[kb] = mfbbc;
+		(D.f[DIR_PP0])[k] = mfaab;
+		(D.f[DIR_MM0])[ksw] = mfccb;
+		(D.f[DIR_PM0])[ks] = mfacb;
+		(D.f[DIR_MP0])[kw] = mfcab;
+		(D.f[DIR_P0P])[k] = mfaba;
+		(D.f[DIR_M0M])[kbw] = mfcbc;
+		(D.f[DIR_P0M])[kb] = mfabc;
+		(D.f[DIR_M0P])[kw] = mfcba;
+		(D.f[DIR_0PP])[k] = mfbaa;
+		(D.f[DIR_0MM])[kbs] = mfbcc;
+		(D.f[DIR_0PM])[kb] = mfbac;
+		(D.f[DIR_0MP])[ks] = mfbca;
+		(D.f[DIR_000])[k] = mfbbb;
+		(D.f[DIR_PPP])[k] = mfaaa;
+		(D.f[DIR_PMP])[ks] = mfaca;
+		(D.f[DIR_PPM])[kb] = mfaac;
+		(D.f[DIR_PMM])[kbs] = mfacc;
+		(D.f[DIR_MPP])[kw] = mfcaa;
+		(D.f[DIR_MMP])[ksw] = mfcca;
+		(D.f[DIR_MPM])[kbw] = mfcac;
+		(D.f[DIR_MMM])[kbsw] = mfccc;
 		////////////////////////////////////////////////////////////////////////////////////
 	}
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
index e02e8abe1321d4b9b47872f9a5e7c5414a587531..6533c604f32a478cdc6a097e4dd7d0b56e48150d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/PorousMediaKernels/FluidFlow/Compressible/CumulantOne/PMCumulantOneCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
+__global__ void LB_Kernel_PM_Cum_One_Comp_SP_27(real omega,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu
index c1be283f7fb0e5585c28bdaf5e4519f468c32c8f..a9d518d14a286ae3f6b565176969162994afa269 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.cu
@@ -1,42 +1,46 @@
 #include "TurbulentViscosityCumulantK17CompChim.h"
 #include "cuda/CudaGrid.h"
+#include <logger/Logger.h>
 #include "Parameter/Parameter.h"
 #include "TurbulentViscosityCumulantK17CompChim_Device.cuh"
 
-std::shared_ptr<TurbulentViscosityCumulantK17CompChim> TurbulentViscosityCumulantK17CompChim::getNewInstance(std::shared_ptr<Parameter> para, int level)
+template<TurbulenceModel turbulenceModel> 
+std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > TurbulentViscosityCumulantK17CompChim<turbulenceModel>::getNewInstance(std::shared_ptr<Parameter> para, int level)
 {
-	return std::shared_ptr<TurbulentViscosityCumulantK17CompChim>(new TurbulentViscosityCumulantK17CompChim(para,level));
+	return std::shared_ptr<TurbulentViscosityCumulantK17CompChim<turbulenceModel> >(new TurbulentViscosityCumulantK17CompChim<turbulenceModel>(para,level));
 }
 
-void TurbulentViscosityCumulantK17CompChim::run()
+template<TurbulenceModel turbulenceModel>
+void TurbulentViscosityCumulantK17CompChim<turbulenceModel>::run()
 {
 	vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, para->getParH(level)->numberOfNodes);
 
-	LB_Kernel_TurbulentViscosityCumulantK17CompChim <<< grid.grid, grid.threads >>>(
-		para->getParD(level)->omega,
-		para->getParD(level)->typeOfGridNode,
-		para->getParD(level)->neighborX,
-		para->getParD(level)->neighborY,
-		para->getParD(level)->neighborZ,
-		para->getParD(level)->distributions.f[0],
-		para->getParD(level)->rho,
-		para->getParD(level)->velocityX,
-		para->getParD(level)->velocityY,
-		para->getParD(level)->velocityZ,
-		para->getParD(level)->turbViscosity,
-		(unsigned long)para->getParD(level)->numberOfNodes,
-		level,
-		para->getIsBodyForce(),
-		para->getForcesDev(),
-		para->getParD(level)->forceX_SP,
-		para->getParD(level)->forceY_SP,
-		para->getParD(level)->forceZ_SP,
-        para->getQuadricLimitersDev(),
-		para->getParD(level)->isEvenTimestep);
+	LB_Kernel_TurbulentViscosityCumulantK17CompChim < turbulenceModel  > <<< grid.grid, grid.threads >>>(   para->getParD(level)->omega, 	
+																											para->getParD(level)->typeOfGridNode, 										para->getParD(level)->neighborX,	
+																											para->getParD(level)->neighborY,	
+																											para->getParD(level)->neighborZ,	
+																											para->getParD(level)->distributions.f[0],	
+																											para->getParD(level)->rho,		
+																											para->getParD(level)->velocityX,		
+																											para->getParD(level)->velocityY,	
+																											para->getParD(level)->velocityZ,	
+																											para->getParD(level)->turbViscosity,
+																											para->getSGSConstant(),
+																											(unsigned long)para->getParD(level)->numberOfNodes,	
+																											level,				
+																											para->getIsBodyForce(),				
+																											para->getForcesDev(),				
+																											para->getParD(level)->forceX_SP,	
+																											para->getParD(level)->forceY_SP,
+																											para->getParD(level)->forceZ_SP,
+																											para->getQuadricLimitersDev(),			
+																											para->getParD(level)->isEvenTimestep);
+
 	getLastCudaError("LB_Kernel_TurbulentViscosityCumulantK17CompChim execution failed");
 }
 
-TurbulentViscosityCumulantK17CompChim::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level)
+template<TurbulenceModel turbulenceModel>
+TurbulentViscosityCumulantK17CompChim<turbulenceModel>::TurbulentViscosityCumulantK17CompChim(std::shared_ptr<Parameter> para, int level)
 {
 	this->para = para;
 	this->level = level;
@@ -44,4 +48,10 @@ TurbulentViscosityCumulantK17CompChim::TurbulentViscosityCumulantK17CompChim(std
 	myPreProcessorTypes.push_back(InitCompSP27);
 
 	myKernelGroup = BasicKernel;
-}
\ No newline at end of file
+
+	VF_LOG_INFO("Using turbulence model: {}", turbulenceModel);
+}
+
+template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>;
+template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>;
+template class TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>;
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h
index d107700e59d657dc6da656037638a407ed0499a3..0d35b68c916e54c6ec6eeeacd7189fe4d9a33c10 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim.h
@@ -2,11 +2,13 @@
 #define TurbulentViscosityCUMULANT_K17_COMP_CHIM_H
 
 #include "Kernel/KernelImp.h"
+#include "Parameter/Parameter.h"
 
+template<TurbulenceModel turbulenceModel> 
 class TurbulentViscosityCumulantK17CompChim : public KernelImp
 {
 public:
-	static std::shared_ptr<TurbulentViscosityCumulantK17CompChim> getNewInstance(std::shared_ptr< Parameter> para, int level);
+	static std::shared_ptr< TurbulentViscosityCumulantK17CompChim<turbulenceModel> > getNewInstance(std::shared_ptr< Parameter> para, int level);
 	void run();
 
 private:
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
index ede7eafeda92faf9be1d5ee0512d37d828093397..0b22960083b61de1b2729d005f0c58a1cacf7398 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cu
@@ -44,13 +44,16 @@
 #include <lbm/constants/NumericConstants.h>
 #include "Kernel/Utilities/DistributionHelper.cuh"
 
+#include "GPU/TurbulentViscosityInlines.cuh"
+
 using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "Kernel/ChimeraTransformation.h"
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
+template<TurbulenceModel turbulenceModel>
+__global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 	real omega_in,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -62,6 +65,7 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
     real* vy,
     real* vz,
     real* turbulentViscosity,
+    real SGSconstant,
 	unsigned long size_Mat,
 	int level,
     bool bodyForce,
@@ -83,11 +87,11 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
     ////////////////////////////////////////////////////////////////////////////////
     //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
     //!
-    const uint k = vf::gpu::getNodeIndex();
+    const unsigned k_000 = vf::gpu::getNodeIndex();
 
     //////////////////////////////////////////////////////////////////////////
     // run for all indices in size_Mat and fluid nodes
-    if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) {
+    if ((k_000 < size_Mat) && (typeOfGridNode[k_000] == GEO_FLUID)) {
         //////////////////////////////////////////////////////////////////////////
         //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
         //! timestep is based on the esoteric twist algorithm \ref <a
@@ -98,66 +102,97 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 
         ////////////////////////////////////////////////////////////////////////////////
         //! - Set neighbor indices (necessary for indirect addressing)
-        uint kw   = neighborX[k];
-        uint ks   = neighborY[k];
-        uint kb   = neighborZ[k];
-        uint ksw  = neighborY[kw];
-        uint kbw  = neighborZ[kw];
-        uint kbs  = neighborZ[ks];
-        uint kbsw = neighborZ[ksw];
+        uint k_M00 = neighborX[k_000];
+        uint k_0M0 = neighborY[k_000];
+        uint k_00M = neighborZ[k_000];
+        uint k_MM0 = neighborY[k_M00];
+        uint k_M0M = neighborZ[k_M00];
+        uint k_0MM = neighborZ[k_0M0];
+        uint k_MMM = neighborZ[k_MM0];
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Set local distributions
         //!
-        real mfcbb = (dist.f[E])[k];
-        real mfabb = (dist.f[W])[kw];
-        real mfbcb = (dist.f[N])[k];
-        real mfbab = (dist.f[S])[ks];
-        real mfbbc = (dist.f[T])[k];
-        real mfbba = (dist.f[B])[kb];
-        real mfccb = (dist.f[NE])[k];
-        real mfaab = (dist.f[SW])[ksw];
-        real mfcab = (dist.f[SE])[ks];
-        real mfacb = (dist.f[NW])[kw];
-        real mfcbc = (dist.f[TE])[k];
-        real mfaba = (dist.f[BW])[kbw];
-        real mfcba = (dist.f[BE])[kb];
-        real mfabc = (dist.f[TW])[kw];
-        real mfbcc = (dist.f[TN])[k];
-        real mfbaa = (dist.f[BS])[kbs];
-        real mfbca = (dist.f[BN])[kb];
-        real mfbac = (dist.f[TS])[ks];
-        real mfbbb = (dist.f[REST])[k];
-        real mfccc = (dist.f[TNE])[k];
-        real mfaac = (dist.f[TSW])[ksw];
-        real mfcac = (dist.f[TSE])[ks];
-        real mfacc = (dist.f[TNW])[kw];
-        real mfcca = (dist.f[BNE])[kb];
-        real mfaaa = (dist.f[BSW])[kbsw];
-        real mfcaa = (dist.f[BSE])[kbs];
-        real mfaca = (dist.f[BNW])[kbw];
+        real f_000 = (dist.f[DIR_000])[k_000];
+        real f_P00 = (dist.f[DIR_P00])[k_000];
+        real f_M00 = (dist.f[DIR_M00])[k_M00];
+        real f_0P0 = (dist.f[DIR_0P0])[k_000];
+        real f_0M0 = (dist.f[DIR_0M0])[k_0M0];
+        real f_00P = (dist.f[DIR_00P])[k_000];
+        real f_00M = (dist.f[DIR_00M])[k_00M];
+        real f_PP0 = (dist.f[DIR_PP0])[k_000];
+        real f_MM0 = (dist.f[DIR_MM0])[k_MM0];
+        real f_PM0 = (dist.f[DIR_PM0])[k_0M0];
+        real f_MP0 = (dist.f[DIR_MP0])[k_M00];
+        real f_P0P = (dist.f[DIR_P0P])[k_000];
+        real f_M0M = (dist.f[DIR_M0M])[k_M0M];
+        real f_P0M = (dist.f[DIR_P0M])[k_00M];
+        real f_M0P = (dist.f[DIR_M0P])[k_M00];
+        real f_0PP = (dist.f[DIR_0PP])[k_000];
+        real f_0MM = (dist.f[DIR_0MM])[k_0MM];
+        real f_0PM = (dist.f[DIR_0PM])[k_00M];
+        real f_0MP = (dist.f[DIR_0MP])[k_0M0];
+        real f_PPP = (dist.f[DIR_PPP])[k_000];
+        real f_MPP = (dist.f[DIR_MPP])[k_M00];
+        real f_PMP = (dist.f[DIR_PMP])[k_0M0];
+        real f_MMP = (dist.f[DIR_MMP])[k_MM0];
+        real f_PPM = (dist.f[DIR_PPM])[k_00M];
+        real f_MPM = (dist.f[DIR_MPM])[k_M0M];
+        real f_PMM = (dist.f[DIR_PMM])[k_0MM];
+        real f_MMM = (dist.f[DIR_MMM])[k_MMM];
+
+        ////////////////////////////////////////////////////////////////////////////////////
+        //! - Define aliases to use the same variable for the moments (m's):
+        //!
+        real& m_111 = f_000;
+        real& m_211 = f_P00;
+        real& m_011 = f_M00;
+        real& m_121 = f_0P0;
+        real& m_101 = f_0M0;
+        real& m_112 = f_00P;
+        real& m_110 = f_00M;
+        real& m_221 = f_PP0;
+        real& m_001 = f_MM0;
+        real& m_201 = f_PM0;
+        real& m_021 = f_MP0;
+        real& m_212 = f_P0P;
+        real& m_010 = f_M0M;
+        real& m_210 = f_P0M;
+        real& m_012 = f_M0P;
+        real& m_122 = f_0PP;
+        real& m_100 = f_0MM;
+        real& m_120 = f_0PM;
+        real& m_102 = f_0MP;
+        real& m_222 = f_PPP;
+        real& m_022 = f_MPP;
+        real& m_202 = f_PMP;
+        real& m_002 = f_MMP;
+        real& m_220 = f_PPM;
+        real& m_020 = f_MPM;
+        real& m_200 = f_PMM;
+        real& m_000 = f_MMM;
+
         //////////////////////////////////////////////////////(unsigned long)//////////////////////////////
         //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
         //!
-        real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-                     (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) +
-                      ((mfacb + mfcab) + (mfaab + mfccb))) +
-                     ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) +
-                    mfbbb;
-
-        real rrho   = c1o1 + drho;
-        real OOrho = c1o1 / rrho;
-
-        real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-                    (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb)) *
-                   OOrho;
-        real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-                    (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab)) *
-                   OOrho;
-        real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-                    (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba)) *
-                   OOrho;
+        real drho = ((((f_PPP + f_MMM) + (f_MPM + f_PMP)) + ((f_MPP + f_PMM) + (f_MMP + f_PPM))) +
+                    (((f_0MP + f_0PM) + (f_0MM + f_0PP)) + ((f_M0P + f_P0M) + (f_M0M + f_P0P)) +
+                    ((f_MP0 + f_PM0) + (f_MM0 + f_PP0))) +
+                    ((f_M00 + f_P00) + (f_0M0 + f_0P0) + (f_00M + f_00P))) +
+                        f_000;
+
+        real oneOverRho = c1o1 / (c1o1 + drho);
+
+        real vvx = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_PMM - f_MPP) + (f_PPM - f_MMP))) +
+                    (((f_P0M - f_M0P) + (f_P0P - f_M0M)) + ((f_PM0 - f_MP0) + (f_PP0 - f_MM0))) + (f_P00 - f_M00)) *
+                oneOverRho;
+        real vvy = ((((f_PPP - f_MMM) + (f_MPM - f_PMP)) + ((f_MPP - f_PMM) + (f_PPM - f_MMP))) +
+                    (((f_0PM - f_0MP) + (f_0PP - f_0MM)) + ((f_MP0 - f_PM0) + (f_PP0 - f_MM0))) + (f_0P0 - f_0M0)) *
+                oneOverRho;
+        real vvz = ((((f_PPP - f_MMM) + (f_PMP - f_MPM)) + ((f_MPP - f_PMM) + (f_MMP - f_PPM))) +
+                    (((f_0MP - f_0PM) + (f_0PP - f_0MM)) + ((f_M0P - f_P0M) + (f_P0P - f_M0M))) + (f_00P - f_00M)) *
+                oneOverRho;
         
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
@@ -173,9 +208,9 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         real fz = forces[2];
 
         if( bodyForce ){
-            fx += bodyForceX[k]; 
-            fy += bodyForceY[k];
-            fz += bodyForceZ[k];
+            fx += bodyForceX[k_000]; 
+            fy += bodyForceY[k_000];
+            fz += bodyForceZ[k_000];
 
             real vx = vvx;
             real vy = vvy;
@@ -202,9 +237,9 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
             //!> differ by several orders of magnitude.
             //!> \note 16/05/2022: Testing, still ongoing! 
             //!
-            bodyForceX[k] = (acc_x-(vvx-vx))*factor*c2o1;
-            bodyForceY[k] = (acc_y-(vvy-vy))*factor*c2o1;
-            bodyForceZ[k] = (acc_z-(vvz-vz))*factor*c2o1;
+            bodyForceX[k_000] = (acc_x-(vvx-vx))*factor*c2o1;
+            bodyForceY[k_000] = (acc_y-(vvy-vy))*factor*c2o1;
+            bodyForceZ[k_000] = (acc_z-(vvz-vz))*factor*c2o1;
         }
         else{
             vvx += fx * c1o2 / factor;
@@ -223,10 +258,9 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
         //!
-        real wadjust;
-        real qudricLimitP = quadricLimiters[0];
-        real qudricLimitM = quadricLimiters[1];
-        real qudricLimitD = quadricLimiters[2];
+        real quadricLimitP = quadricLimiters[0];
+        real quadricLimitM = quadricLimiters[1];
+        real quadricLimitD = quadricLimiters[2];
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
@@ -236,39 +270,39 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //!
         ////////////////////////////////////////////////////////////////////////////////////
         // Z - Dir
-        forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-        forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-        forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-        forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
+        forwardInverseChimeraWithK(f_MMM, f_MM0, f_MMP, vvz, vz2, c36o1, c1o36);
+        forwardInverseChimeraWithK(f_M0M, f_M00, f_M0P, vvz, vz2, c9o1,  c1o9);
+        forwardInverseChimeraWithK(f_MPM, f_MP0, f_MPP, vvz, vz2, c36o1, c1o36);
+        forwardInverseChimeraWithK(f_0MM, f_0M0, f_0MP, vvz, vz2, c9o1,  c1o9);
+        forwardInverseChimeraWithK(f_00M, f_000, f_00P, vvz, vz2, c9o4,  c4o9);
+        forwardInverseChimeraWithK(f_0PM, f_0P0, f_0PP, vvz, vz2, c9o1,  c1o9);
+        forwardInverseChimeraWithK(f_PMM, f_PM0, f_PMP, vvz, vz2, c36o1, c1o36);
+        forwardInverseChimeraWithK(f_P0M, f_P00, f_P0P, vvz, vz2, c9o1,  c1o9);
+        forwardInverseChimeraWithK(f_PPM, f_PP0, f_PPP, vvz, vz2, c36o1, c1o36);
 
         ////////////////////////////////////////////////////////////////////////////////////
         // Y - Dir
-        forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-        forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-        forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-        forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-        forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-        forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-        forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-        forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-        forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
+        forwardInverseChimeraWithK(f_MMM, f_M0M, f_MPM, vvy, vy2, c6o1,  c1o6);
+        forwardChimera(            f_MM0, f_M00, f_MP0, vvy, vy2);
+        forwardInverseChimeraWithK(f_MMP, f_M0P, f_MPP, vvy, vy2, c18o1, c1o18);
+        forwardInverseChimeraWithK(f_0MM, f_00M, f_0PM, vvy, vy2, c3o2,  c2o3);
+        forwardChimera(            f_0M0, f_000, f_0P0, vvy, vy2);
+        forwardInverseChimeraWithK(f_0MP, f_00P, f_0PP, vvy, vy2, c9o2,  c2o9);
+        forwardInverseChimeraWithK(f_PMM, f_P0M, f_PPM, vvy, vy2, c6o1,  c1o6);
+        forwardChimera(            f_PM0, f_P00, f_PP0, vvy, vy2);
+        forwardInverseChimeraWithK(f_PMP, f_P0P, f_PPP, vvy, vy2, c18o1, c1o18);
 
         ////////////////////////////////////////////////////////////////////////////////////
         // X - Dir
-        forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-        forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-        forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-        forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-        forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-        forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-        forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-        forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-        forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9);
+        forwardInverseChimeraWithK(f_MMM, f_0MM, f_PMM, vvx, vx2, c1o1, c1o1);
+        forwardChimera(            f_M0M, f_00M, f_P0M, vvx, vx2);
+        forwardInverseChimeraWithK(f_MPM, f_0PM, f_PPM, vvx, vx2, c3o1, c1o3);
+        forwardChimera(            f_MM0, f_0M0, f_PM0, vvx, vx2);
+        forwardChimera(            f_M00, f_000, f_P00, vvx, vx2);
+        forwardChimera(            f_MP0, f_0P0, f_PP0, vvx, vx2);
+        forwardInverseChimeraWithK(f_MMP, f_0MP, f_PMP, vvx, vx2, c3o1, c1o3);
+        forwardChimera(            f_M0P, f_00P, f_P0P, vvx, vx2);
+        forwardInverseChimeraWithK(f_MPP, f_0PP, f_PPP, vvx, vx2, c3o1, c1o9);
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
@@ -291,7 +325,7 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Calculate modified omega with turbulent viscosity
         //!
-        real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k]);
+        real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k_000]);
         ////////////////////////////////////////////////////////////
         // 2.
         real OxxPyyPzz = c1o1;
@@ -313,7 +347,7 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         real O6 = c1o1;
 
         ////////////////////////////////////////////////////////////////////////////////////
-        //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
+        //! - A and DIR_00M: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
         //! different bulk viscosity).
@@ -328,63 +362,60 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //!
         ////////////////////////////////////////////////////////////
         // 4.
-        real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
-        real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
-        real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
+        real c_211 = m_211 - ((m_200 + c1o3) * m_011 + c2o1 * m_110 * m_101) * oneOverRho;
+        real c_121 = m_121 - ((m_020 + c1o3) * m_101 + c2o1 * m_110 * m_011) * oneOverRho;
+        real c_112 = m_112 - ((m_002 + c1o3) * m_110 + c2o1 * m_101 * m_011) * oneOverRho;
 
-        real CUMcca =
-            mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-        real CUMcac =
-            mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
-        real CUMacc =
-            mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
+        real c_220 = m_220 - (((m_200 * m_020 + c2o1 * m_110 * m_110) + c1o3 * (m_200 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
+        real c_202 = m_202 - (((m_200 * m_002 + c2o1 * m_101 * m_101) + c1o3 * (m_200 + m_002)) * oneOverRho - c1o9 * (drho * oneOverRho));
+        real c_022 = m_022 - (((m_002 * m_020 + c2o1 * m_011 * m_011) + c1o3 * (m_002 + m_020)) * oneOverRho - c1o9 * (drho * oneOverRho));
         ////////////////////////////////////////////////////////////
         // 5.
-        real CUMbcc =
-            mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-                     c1o3 * (mfbca + mfbac)) *
-                        OOrho;
-        real CUMcbc =
-            mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-                     c1o3 * (mfcba + mfabc)) *
-                        OOrho;
-        real CUMccb =
-            mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-                     c1o3 * (mfacb + mfcab)) *
-                        OOrho;
+        real c_122 =
+            m_122 - ((m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
+                    c1o3 * (m_120 + m_102)) *
+                    oneOverRho;
+        real c_212 =
+            m_212 - ((m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
+                    c1o3 * (m_210 + m_012)) *
+                    oneOverRho;
+        real c_221 =
+            m_221 - ((m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
+                    c1o3 * (m_021 + m_201)) *
+                    oneOverRho;
         ////////////////////////////////////////////////////////////
         // 6.
-        real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                                c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                                c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-                                   OOrho +
-                               (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                                c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-                                   OOrho * OOrho -
-                               c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                               (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-                                (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-                                   OOrho * OOrho * c2o3 +
-                               c1o27 * ((drho * drho - drho) * OOrho * OOrho));
+        real c_222 = m_222 + ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
+                                c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
+                                c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
+                                oneOverRho +
+                            (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                                c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
+                                oneOverRho * oneOverRho -
+                                c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
+                            (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                                (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
+                                oneOverRho * oneOverRho * c2o3 +
+                                c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Compute linear combinations of second and third order cumulants
         //!
         ////////////////////////////////////////////////////////////
         // 2.
-        real mxxPyyPzz = mfcaa + mfaca + mfaac;
-        real mxxMyy    = mfcaa - mfaca;
-        real mxxMzz    = mfcaa - mfaac;
+        real mxxPyyPzz = m_200 + m_020 + m_002;
+        real mxxMyy    = m_200 - m_020;
+        real mxxMzz    = m_200 - m_002;
         ////////////////////////////////////////////////////////////
         // 3.
-        real mxxyPyzz = mfcba + mfabc;
-        real mxxyMyzz = mfcba - mfabc;
+        real mxxyPyzz = m_210 + m_012;
+        real mxxyMyzz = m_210 - m_012;
 
-        real mxxzPyyz = mfcab + mfacb;
-        real mxxzMyyz = mfcab - mfacb;
+        real mxxzPyyz = m_201 + m_021;
+        real mxxzMyyz = m_201 - m_021;
 
-        real mxyyPxzz = mfbca + mfbac;
-        real mxyyMxzz = mfbca - mfbac;
+        real mxyyPxzz = m_120 + m_102;
+        real mxyyMxzz = m_120 - m_102;
 
         ////////////////////////////////////////////////////////////////////////////////////
         // incl. correction
@@ -396,37 +427,33 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times
         //! the gradients later.
         //!
-        real Dxy  = -c3o1 * omega * mfbba;
-        real Dxz  = -c3o1 * omega * mfbab;
-        real Dyz  = -c3o1 * omega * mfabb;
-        real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
+        real Dxy  = -c3o1 * omega * m_110;
+        real Dxz  = -c3o1 * omega * m_101;
+        real Dyz  = -c3o1 * omega * m_011;
+        real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (m_000 - mxxPyyPzz);
         real dyuy = dxux + omega * c3o2 * mxxMyy;
         real dzuz = dxux + omega * c3o2 * mxxMzz;
 
-        //Smagorinsky for debugging
-        // if(true)
-        // {   
-            // if(false && k==99976)
-            // {
-            //     printf("dudz+dwdu: \t %1.14f \n", Dxz );
-            //     printf("dvdz+dudy: \t %1.14f \n", Dxy );  
-            //     printf("dwdy+dvdz: \t %1.14f \n", Dyz );  
-            //     printf("nu_t * dudz+dwdu: \t %1.14f \n", turbulentViscosity[k]*Dxz );
-            //     printf("nu_t * dvdz+dudy: \t %1.14f \n", turbulentViscosity[k]*Dxy );  
-            //     printf("nu_t * dwdy+dvdz: \t %1.14f \n", turbulentViscosity[k]*Dyz );      
-            // } 
-        //     real Sbar = sqrt(c2o1*(dxux*dxux+dyuy*dyuy+dzuz*dzuz)+Dxy*Dxy+Dxz*Dxz+Dyz*Dyz);
-        //     real Cs = 0.08f;
-        //     turbulentViscosity[k] = Cs*Cs*Sbar;
-        // }
-
+        ////////////////////////////////////////////////////////////////////////////////////
+        switch (turbulenceModel)
+        {
+        case TurbulenceModel::AMD:  //AMD is computed in separate kernel
+            break;
+        case TurbulenceModel::Smagorinsky:
+            turbulentViscosity[k_000] = calcTurbulentViscositySmagorinsky(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
+            break;
+        case TurbulenceModel::QR:
+            turbulentViscosity[k_000] = calcTurbulentViscosityQR(SGSconstant, dxux, dyuy, dzuz, Dxy, Dxz , Dyz);
+            break;
+        default:
+            break;
+        }
         ////////////////////////////////////////////////////////////
         //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
         //!
-        mxxPyyPzz +=
-            OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
+        mxxPyyPzz += OxxPyyPzz * (m_000 - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
         mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
         mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
 
@@ -436,9 +463,9 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         // mxxMyy += -(-omega) * (-mxxMyy);
         // mxxMzz += -(-omega) * (-mxxMzz);
         //////////////////////////////////////////////////////////////////////////
-        mfabb += omega * (-mfabb);
-        mfbab += omega * (-mfbab);
-        mfbba += omega * (-mfbba);
+        m_011 += omega * (-m_011);
+        m_101 += omega * (-m_101);
+        m_110 += omega * (-m_110);
 
         ////////////////////////////////////////////////////////////////////////////////////
         // relax
@@ -448,19 +475,19 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
         //!
-        wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
-        mfbbb += wadjust * (-mfbbb);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
+        real wadjust = Oxyz + (c1o1 - Oxyz) * abs(m_111) / (abs(m_111) + quadricLimitD);
+        m_111 += wadjust * (-m_111);
+        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + quadricLimitP);
         mxxyPyzz += wadjust * (-mxxyPyzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
+        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + quadricLimitM);
         mxxyMyzz += wadjust * (-mxxyMyzz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
+        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + quadricLimitP);
         mxxzPyyz += wadjust * (-mxxzPyyz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
+        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + quadricLimitM);
         mxxzMyyz += wadjust * (-mxxzMyyz);
-        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
+        wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + quadricLimitP);
         mxyyPxzz += wadjust * (-mxyyPxzz);
-        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
+        wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + quadricLimitM);
         mxyyMxzz += wadjust * (-mxyyMxzz);
         //////////////////////////////////////////////////////////////////////////
         // no limiter
@@ -475,16 +502,16 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Compute inverse linear combinations of second and third order cumulants
         //!
-        mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-        mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
-        mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
-
-        mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-        mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-        mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-        mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-        mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-        mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
+        m_200 = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
+        m_020 = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
+        m_002 = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
+
+        m_210 = ( mxxyMyzz + mxxyPyzz) * c1o2;
+        m_012 = (-mxxyMyzz + mxxyPyzz) * c1o2;
+        m_201 = ( mxxzMyyz + mxxzPyyz) * c1o2;
+        m_021 = (-mxxzMyyz + mxxzPyyz) * c1o2;
+        m_120 = ( mxyyMxzz + mxyyPxzz) * c1o2;
+        m_102 = (-mxyyMxzz + mxyyPxzz) * c1o2;
         //////////////////////////////////////////////////////////////////////////
 
         //////////////////////////////////////////////////////////////////////////
@@ -494,22 +521,23 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
         //!
-        CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMacc);
-        CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (CUMcac);
-        CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (CUMcca);
-        CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * factorB + (c1o1 - O4) * (CUMbbc);
-        CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * factorB + (c1o1 - O4) * (CUMbcb);
-        CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * factorB + (c1o1 - O4) * (CUMcbb);
+        c_022 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_022);
+        c_202 = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * factorA + (c1o1 - O4) * (c_202);
+        c_220 = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * factorA + (c1o1 - O4) * (c_220);
+        c_112 = -O4 * (c1o1 / omega - c1o2) * Dxy           * c1o3 * factorB + (c1o1 - O4) * (c_112);
+        c_121 = -O4 * (c1o1 / omega - c1o2) * Dxz           * c1o3 * factorB + (c1o1 - O4) * (c_121);
+        c_211 = -O4 * (c1o1 / omega - c1o2) * Dyz           * c1o3 * factorB + (c1o1 - O4) * (c_211);
+
 
         //////////////////////////////////////////////////////////////////////////
         // 5.
-        CUMbcc += O5 * (-CUMbcc);
-        CUMcbc += O5 * (-CUMcbc);
-        CUMccb += O5 * (-CUMccb);
+        c_122 += O5 * (-c_122);
+        c_212 += O5 * (-c_212);
+        c_221 += O5 * (-c_221);
 
         //////////////////////////////////////////////////////////////////////////
         // 6.
-        CUMccc += O6 * (-CUMccc);
+        c_222 += O6 * (-c_222);
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
@@ -519,68 +547,58 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 
         //////////////////////////////////////////////////////////////////////////
         // 4.
-        mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
-        mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
-        mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
+        m_211 = c_211 + c1o3 * ((c3o1 * m_200 + c1o1) * m_011 + c6o1 * m_110 * m_101) * oneOverRho;
+        m_121 = c_121 + c1o3 * ((c3o1 * m_020 + c1o1) * m_101 + c6o1 * m_110 * m_011) * oneOverRho;
+        m_112 = c_112 + c1o3 * ((c3o1 * m_002 + c1o1) * m_110 + c6o1 * m_101 * m_011) * oneOverRho;
 
-        mfcca =
-            CUMcca +
-            (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-        mfcac =
-            CUMcac +
-            (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
-        mfacc =
-            CUMacc +
-            (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
+        m_220 =
+            c_220 + (((m_200 * m_020 + c2o1 * m_110 * m_110) * c9o1 + c3o1 * (m_200 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
+        m_202 =
+            c_202 + (((m_200 * m_002 + c2o1 * m_101 * m_101) * c9o1 + c3o1 * (m_200 + m_002)) * oneOverRho - (drho * oneOverRho)) * c1o9;
+        m_022 =
+            c_022 + (((m_002 * m_020 + c2o1 * m_011 * m_011) * c9o1 + c3o1 * (m_002 + m_020)) * oneOverRho - (drho * oneOverRho)) * c1o9;
 
         //////////////////////////////////////////////////////////////////////////
         // 5.
-        mfbcc = CUMbcc + c1o3 *
-                             (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb +
-                                      c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-                              (mfbca + mfbac)) *
-                             OOrho;
-        mfcbc = CUMcbc + c1o3 *
-                             (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb +
-                                      c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-                              (mfcba + mfabc)) *
-                             OOrho;
-        mfccb = CUMccb + c1o3 *
-                             (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb +
-                                      c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-                              (mfacb + mfcab)) *
-                             OOrho;
+        m_122 = c_122 + c1o3 *
+                (c3o1 * (m_002 * m_120 + m_020 * m_102 + c4o1 * m_011 * m_111 + c2o1 * (m_101 * m_021 + m_110 * m_012)) +
+                (m_120 + m_102)) * oneOverRho;
+        m_212 = c_212 + c1o3 *
+                (c3o1 * (m_002 * m_210 + m_200 * m_012 + c4o1 * m_101 * m_111 + c2o1 * (m_011 * m_201 + m_110 * m_102)) +
+                (m_210 + m_012)) * oneOverRho;
+        m_221 = c_221 + c1o3 *
+                (c3o1 * (m_200 * m_021 + m_020 * m_201 + c4o1 * m_110 * m_111 + c2o1 * (m_101 * m_120 + m_011 * m_210)) +
+                (m_021 + m_201)) * oneOverRho;
 
         //////////////////////////////////////////////////////////////////////////
         // 6.
-        mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-                           c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-                           c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-                              OOrho +
-                          (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-                           c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-                              OOrho * OOrho -
-                          c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-                          (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-                           (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-                              OOrho * OOrho * c2o3 +
-                          c1o27 * ((drho * drho - drho) * OOrho * OOrho));
+        m_222 = c_222 - ((-c4o1 * m_111 * m_111 - (m_200 * m_022 + m_020 * m_202 + m_002 * m_220) -
+                        c4o1 * (m_011 * m_211 + m_101 * m_121 + m_110 * m_112) -
+                        c2o1 * (m_120 * m_102 + m_210 * m_012 + m_201 * m_021)) *
+                        oneOverRho +
+                        (c4o1 * (m_101 * m_101 * m_020 + m_011 * m_011 * m_200 + m_110 * m_110 * m_002) +
+                        c2o1 * (m_200 * m_020 * m_002) + c16o1 * m_110 * m_101 * m_011) *
+                        oneOverRho * oneOverRho -
+                        c1o3 * (m_022 + m_202 + m_220) * oneOverRho - c1o9 * (m_200 + m_020 + m_002) * oneOverRho +
+                        (c2o1 * (m_101 * m_101 + m_011 * m_011 + m_110 * m_110) +
+                        (m_002 * m_020 + m_002 * m_200 + m_020 * m_200) + c1o3 * (m_002 + m_020 + m_200)) *
+                        oneOverRho * oneOverRho * c2o3 +
+                        c1o27 * ((drho * drho - drho) * oneOverRho * oneOverRho));
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
         //!
-        mfbaa = -mfbaa;
-        mfaba = -mfaba;
-        mfaab = -mfaab;
-
+        m_100 = -m_100;
+        m_010 = -m_010;
+        m_001 = -m_001;
 
         //Write to array here to distribute read/write
-        rho[k] = drho;
-        vx[k] = vvx;
-        vy[k] = vvy;
-        vz[k] = vvz;
+        rho[k_000] = drho;
+        vx[k_000] = vvx;
+        vy[k_000] = vvy;
+        vz[k_000] = vvz;
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
@@ -591,39 +609,39 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //!
         ////////////////////////////////////////////////////////////////////////////////////
         // X - Dir
-        backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-        backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-        backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-        backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-        backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-        backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-        backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-        backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-        backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
+        backwardInverseChimeraWithK(m_000, m_100, m_200, vvx, vx2, c1o1, c1o1);
+        backwardChimera(            m_010, m_110, m_210, vvx, vx2);
+        backwardInverseChimeraWithK(m_020, m_120, m_220, vvx, vx2, c3o1, c1o3);
+        backwardChimera(            m_001, m_101, m_201, vvx, vx2);
+        backwardChimera(            m_011, m_111, m_211, vvx, vx2);
+        backwardChimera(            m_021, m_121, m_221, vvx, vx2);
+        backwardInverseChimeraWithK(m_002, m_102, m_202, vvx, vx2, c3o1, c1o3);
+        backwardChimera(            m_012, m_112, m_212, vvx, vx2);
+        backwardInverseChimeraWithK(m_022, m_122, m_222, vvx, vx2, c9o1, c1o9);
 
         ////////////////////////////////////////////////////////////////////////////////////
         // Y - Dir
-        backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-        backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-        backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-        backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-        backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-        backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-        backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-        backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-        backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
+        backwardInverseChimeraWithK(m_000, m_010, m_020, vvy, vy2, c6o1, c1o6);
+        backwardChimera(            m_001, m_011, m_021, vvy, vy2);
+        backwardInverseChimeraWithK(m_002, m_012, m_022, vvy, vy2, c18o1, c1o18);
+        backwardInverseChimeraWithK(m_100, m_110, m_120, vvy, vy2, c3o2, c2o3);
+        backwardChimera(            m_101, m_111, m_121, vvy, vy2);
+        backwardInverseChimeraWithK(m_102, m_112, m_122, vvy, vy2, c9o2, c2o9);
+        backwardInverseChimeraWithK(m_200, m_210, m_220, vvy, vy2, c6o1, c1o6);
+        backwardChimera(            m_201, m_211, m_221, vvy, vy2);
+        backwardInverseChimeraWithK(m_202, m_212, m_222, vvy, vy2, c18o1, c1o18);
 
         ////////////////////////////////////////////////////////////////////////////////////
         // Z - Dir
-        backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-        backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-        backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-        backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
+        backwardInverseChimeraWithK(m_000, m_001, m_002, vvz, vz2, c36o1, c1o36);
+        backwardInverseChimeraWithK(m_010, m_011, m_012, vvz, vz2, c9o1, c1o9);
+        backwardInverseChimeraWithK(m_020, m_021, m_022, vvz, vz2, c36o1, c1o36);
+        backwardInverseChimeraWithK(m_100, m_101, m_102, vvz, vz2, c9o1, c1o9);
+        backwardInverseChimeraWithK(m_110, m_111, m_112, vvz, vz2, c9o4, c4o9);
+        backwardInverseChimeraWithK(m_120, m_121, m_122, vvz, vz2, c9o1, c1o9);
+        backwardInverseChimeraWithK(m_200, m_201, m_202, vvz, vz2, c36o1, c1o36);
+        backwardInverseChimeraWithK(m_210, m_211, m_212, vvz, vz2, c9o1, c1o9);
+        backwardInverseChimeraWithK(m_220, m_221, m_222, vvz, vz2, c36o1, c1o36);
 
         ////////////////////////////////////////////////////////////////////////////////////
         //! - Write distributions: style of reading and writing the distributions from/to
@@ -631,905 +649,38 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
         //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
         //! DOI:10.3390/computation5020019 ]</b></a>
         //!
-        (dist.f[E])[k]      = mfabb;
-        (dist.f[W])[kw]     = mfcbb;
-        (dist.f[N])[k]      = mfbab;
-        (dist.f[S])[ks]     = mfbcb;
-        (dist.f[T])[k]      = mfbba;
-        (dist.f[B])[kb]     = mfbbc;
-        (dist.f[NE])[k]     = mfaab;
-        (dist.f[SW])[ksw]   = mfccb;
-        (dist.f[SE])[ks]    = mfacb;
-        (dist.f[NW])[kw]    = mfcab;
-        (dist.f[TE])[k]     = mfaba;
-        (dist.f[BW])[kbw]   = mfcbc;
-        (dist.f[BE])[kb]    = mfabc;
-        (dist.f[TW])[kw]    = mfcba;
-        (dist.f[TN])[k]     = mfbaa;
-        (dist.f[BS])[kbs]   = mfbcc;
-        (dist.f[BN])[kb]    = mfbac;
-        (dist.f[TS])[ks]    = mfbca;
-        (dist.f[REST])[k]   = mfbbb;
-        (dist.f[TNE])[k]    = mfaaa;
-        (dist.f[TSE])[ks]   = mfaca;
-        (dist.f[BNE])[kb]   = mfaac;
-        (dist.f[BSE])[kbs]  = mfacc;
-        (dist.f[TNW])[kw]   = mfcaa;
-        (dist.f[TSW])[ksw]  = mfcca;
-        (dist.f[BNW])[kbw]  = mfcac;
-        (dist.f[BSW])[kbsw] = mfccc;
-
-
+        (dist.f[DIR_P00])[k_000]    = f_M00;
+        (dist.f[DIR_M00])[k_M00]    = f_P00;
+        (dist.f[DIR_0P0])[k_000]    = f_0M0;
+        (dist.f[DIR_0M0])[k_0M0]    = f_0P0;
+        (dist.f[DIR_00P])[k_000]    = f_00M;
+        (dist.f[DIR_00M])[k_00M]    = f_00P;
+        (dist.f[DIR_PP0])[k_000]   = f_MM0;
+        (dist.f[DIR_MM0])[k_MM0]   = f_PP0;
+        (dist.f[DIR_PM0])[k_0M0]   = f_MP0;
+        (dist.f[DIR_MP0])[k_M00]   = f_PM0;
+        (dist.f[DIR_P0P])[k_000]   = f_M0M;
+        (dist.f[DIR_M0M])[k_M0M]   = f_P0P;
+        (dist.f[DIR_P0M])[k_00M]   = f_M0P;
+        (dist.f[DIR_M0P])[k_M00]   = f_P0M;
+        (dist.f[DIR_0PP])[k_000]   = f_0MM;
+        (dist.f[DIR_0MM])[k_0MM]   = f_0PP;
+        (dist.f[DIR_0PM])[k_00M]   = f_0MP;
+        (dist.f[DIR_0MP])[k_0M0]   = f_0PM;
+        (dist.f[DIR_000])[k_000] = f_000;
+        (dist.f[DIR_PPP])[k_000]  = f_MMM;
+        (dist.f[DIR_PMP])[k_0M0]  = f_MPM;
+        (dist.f[DIR_PPM])[k_00M]  = f_MMP;
+        (dist.f[DIR_PMM])[k_0MM]  = f_MPP;
+        (dist.f[DIR_MPP])[k_M00]  = f_PMM;
+        (dist.f[DIR_MMP])[k_MM0]  = f_PPM;
+        (dist.f[DIR_MPM])[k_M0M]  = f_PMP;
+        (dist.f[DIR_MMM])[k_MMM]  = f_PPP;
     }
 }
 
+template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::AMD > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
 
+template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::Smagorinsky > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
 
-
-//WORK IN PROGRESS: Incorporating DistributionWrapper in kernel.....
-
-// //=======================================================================================
-// // ____          ____    __    ______     __________   __      __       __        __         
-// // \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |        
-// //  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |        
-// //   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |        
-// //    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____    
-// //     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|   
-// //      \    \  |    |   ________________________________________________________________    
-// //       \    \ |    |  |  ______________________________________________________________|   
-// //        \    \|    |  |  |         __          __     __     __     ______      _______    
-// //         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)   
-// //          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______    
-// //           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
-// //            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/   
-// //
-// //  This file is part of VirtualFluids. VirtualFluids is free software: you can 
-// //  redistribute it and/or modify it under the terms of the GNU General Public
-// //  License as published by the Free Software Foundation, either version 3 of 
-// //  the License, or (at your option) any later version.
-// //  
-// //  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT 
-// //  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-// //  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
-// //  for more details.
-// //  
-// //  You should have received a copy of the GNU General Public License along
-// //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-// //
-// //! \file TurbulentViscosityCumulantK17CompChim_Device.cu
-// //! \author Henry Korb, Henrik Asmuth
-// //! \date 16/05/2022
-// //! \brief CumulantK17CompChim kernel by Martin Schönherr that inlcudes turbulent viscosity and other small mods.
-// //!
-// //! Additions to CumulantK17CompChim:
-// //!     - can incorporate local body force 
-// //!     - when applying a local body force, the total round of error of forcing+bodyforce is saved and added in next time step
-// //!     - uses turbulent viscosity that is computed in separate kernel (as of now AMD)
-// //!     - saves macroscopic values (needed for instance for probes, AMD, and actuator models)
-// //!
-// //=======================================================================================
-// /* Device code */
-// #include "LBM/LB.h" 
-// #include "lbm/constants/D3Q27.h"
-// #include <lbm/constants/NumericConstants.h>
-
-// using namespace vf::lbm::constant;
-// #include "Kernel/ChimeraTransformation.h"
-
-// #include "Kernel/Utilities/DistributionHelper.cuh"
-
-// #include "lbm/MacroscopicQuantities.h"
-
-// ////////////////////////////////////////////////////////////////////////////////
-// extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
-// 	real omega_in,
-// 	uint* typeOfGridNode,
-// 	uint* neighborX,
-// 	uint* neighborY,
-// 	uint* neighborZ,
-// 	real* distributions,
-//     real* rho,
-//     real* vx,
-//     real* vy,
-//     real* vz,
-//     real* turbulentViscosity,
-// 	unsigned long size_Mat,
-// 	int level,
-//     bool bodyForce,
-// 	real* forces,
-//     real* bodyForceX,
-//     real* bodyForceY,
-//     real* bodyForceZ,
-// 	real* quadricLimiters,
-// 	bool isEvenTimestep)
-// {
-//     //////////////////////////////////////////////////////////////////////////
-//     //! Cumulant K17 Kernel is based on \ref
-//     //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//     //! ]</b></a> and \ref <a href="https://doi.org/10.1016/j.jcp.2017.07.004"><b>[ M. Geier et al. (2017),
-//     //! DOI:10.1016/j.jcp.2017.07.004 ]</b></a>
-//     //!
-//     //! The cumulant kernel is executed in the following steps
-//     //!
-//     ////////////////////////////////////////////////////////////////////////////////
-//     //! - Get node index coordinates from threadIdx, blockIdx, blockDim and gridDim.
-//     //!
-
-//     // const unsigned x = threadIdx.x;
-//     // const unsigned y = blockIdx.x;
-//     // const unsigned z = blockIdx.y;
-
-//     // const unsigned nx = blockDim.x;
-//     // const unsigned ny = gridDim.x;
-
-//     // const unsigned k = nx * (ny * z + y) + x;
-//     const unsigned k = vf::gpu::getNodeIndex();
-//     //////////////////////////////////////////////////////////////////////////
-//     // run for all indices in size_Mat and fluid nodes
-//     // if ((k < size_Mat) && (typeOfGridNode[k] == GEO_FLUID)) {
-//     if ((k < size_Mat) && vf::gpu::isValidFluidNode(typeOfGridNode[k])) {
-//         //////////////////////////////////////////////////////////////////////////
-//         //! - Read distributions: style of reading and writing the distributions from/to stored arrays dependent on
-//         //! timestep is based on the esoteric twist algorithm \ref <a
-//         //! href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.3390/computation5020019 ]</b></a>
-//         //!
-
-//         vf::gpu::DistributionWrapper distr_wrapper( distributions, size_Mat, 
-//                                                     isEvenTimestep, k, 
-//                                                     neighborX, neighborY, neighborZ);
-
-//         Distributions27 dist;
-//         if (isEvenTimestep) {
-//             dist.f[E]    = &distributions[E * size_Mat];
-//             dist.f[W]    = &distributions[W * size_Mat];
-//             dist.f[N]    = &distributions[N * size_Mat];
-//             dist.f[S]    = &distributions[S * size_Mat];
-//             dist.f[T]    = &distributions[T * size_Mat];
-//             dist.f[B]    = &distributions[B * size_Mat];
-//             dist.f[NE]   = &distributions[NE * size_Mat];
-//             dist.f[SW]   = &distributions[SW * size_Mat];
-//             dist.f[SE]   = &distributions[SE * size_Mat];
-//             dist.f[NW]   = &distributions[NW * size_Mat];
-//             dist.f[TE]   = &distributions[TE * size_Mat];
-//             dist.f[BW]   = &distributions[BW * size_Mat];
-//             dist.f[BE]   = &distributions[BE * size_Mat];
-//             dist.f[TW]   = &distributions[TW * size_Mat];
-//             dist.f[TN]   = &distributions[TN * size_Mat];
-//             dist.f[BS]   = &distributions[BS * size_Mat];
-//             dist.f[BN]   = &distributions[BN * size_Mat];
-//             dist.f[TS]   = &distributions[TS * size_Mat];
-//             dist.f[REST] = &distributions[REST * size_Mat];
-//             dist.f[TNE]  = &distributions[TNE * size_Mat];
-//             dist.f[TSW]  = &distributions[TSW * size_Mat];
-//             dist.f[TSE]  = &distributions[TSE * size_Mat];
-//             dist.f[TNW]  = &distributions[TNW * size_Mat];
-//             dist.f[BNE]  = &distributions[BNE * size_Mat];
-//             dist.f[BSW]  = &distributions[BSW * size_Mat];
-//             dist.f[BSE]  = &distributions[BSE * size_Mat];
-//             dist.f[BNW]  = &distributions[BNW * size_Mat];
-//         } else {
-//             dist.f[W]    = &distributions[E * size_Mat];
-//             dist.f[E]    = &distributions[W * size_Mat];
-//             dist.f[S]    = &distributions[N * size_Mat];
-//             dist.f[N]    = &distributions[S * size_Mat];
-//             dist.f[B]    = &distributions[T * size_Mat];
-//             dist.f[T]    = &distributions[B * size_Mat];
-//             dist.f[SW]   = &distributions[NE * size_Mat];
-//             dist.f[NE]   = &distributions[SW * size_Mat];
-//             dist.f[NW]   = &distributions[SE * size_Mat];
-//             dist.f[SE]   = &distributions[NW * size_Mat];
-//             dist.f[BW]   = &distributions[TE * size_Mat];
-//             dist.f[TE]   = &distributions[BW * size_Mat];
-//             dist.f[TW]   = &distributions[BE * size_Mat];
-//             dist.f[BE]   = &distributions[TW * size_Mat];
-//             dist.f[BS]   = &distributions[TN * size_Mat];
-//             dist.f[TN]   = &distributions[BS * size_Mat];
-//             dist.f[TS]   = &distributions[BN * size_Mat];
-//             dist.f[BN]   = &distributions[TS * size_Mat];
-//             dist.f[REST] = &distributions[REST * size_Mat];
-//             dist.f[BSW]  = &distributions[TNE * size_Mat];
-//             dist.f[BNE]  = &distributions[TSW * size_Mat];
-//             dist.f[BNW]  = &distributions[TSE * size_Mat];
-//             dist.f[BSE]  = &distributions[TNW * size_Mat];
-//             dist.f[TSW]  = &distributions[BNE * size_Mat];
-//             dist.f[TNE]  = &distributions[BSW * size_Mat];
-//             dist.f[TNW]  = &distributions[BSE * size_Mat];
-//             dist.f[TSE]  = &distributions[BNW * size_Mat];
-//         }
-//         ////////////////////////////////////////////////////////////////////////////////
-//         //! - Set neighbor indices (necessary for indirect addressing)
-//         uint kw   = neighborX[k];
-//         uint ks   = neighborY[k];
-//         uint kb   = neighborZ[k];
-//         uint ksw  = neighborY[kw];
-//         uint kbw  = neighborZ[kw];
-//         uint kbs  = neighborZ[ks];
-//         uint kbsw = neighborZ[ksw];
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Set local distributions
-//         //!
-
-//         // real mfcbb = distr_wrapper.distribution.f[E];
-//         // real mfabb = distr_wrapper.distribution.f[W];
-//         // real mfbcb = distr_wrapper.distribution.f[N];
-//         // real mfbab = distr_wrapper.distribution.f[S];
-//         // real mfbbc = distr_wrapper.distribution.f[T];
-//         // real mfbba = distr_wrapper.distribution.f[B];
-//         // real mfccb = distr_wrapper.distribution.f[NE];
-//         // real mfaab = distr_wrapper.distribution.f[SW];
-//         // real mfcab = distr_wrapper.distribution.f[SE];
-//         // real mfacb = distr_wrapper.distribution.f[NW];
-//         // real mfcbc = distr_wrapper.distribution.f[TE];
-//         // real mfaba = distr_wrapper.distribution.f[BW];
-//         // real mfcba = distr_wrapper.distribution.f[BE];
-//         // real mfabc = distr_wrapper.distribution.f[TW];
-//         // real mfbcc = distr_wrapper.distribution.f[TN];
-//         // real mfbaa = distr_wrapper.distribution.f[BS];
-//         // real mfbca = distr_wrapper.distribution.f[BN];
-//         // real mfbac = distr_wrapper.distribution.f[TS];
-//         // real mfbbb = distr_wrapper.distribution.f[REST];
-//         // real mfccc = distr_wrapper.distribution.f[TNE];
-//         // real mfaac = distr_wrapper.distribution.f[TSW];
-//         // real mfcac = distr_wrapper.distribution.f[TSE];
-//         // real mfacc = distr_wrapper.distribution.f[TNW];
-//         // real mfcca = distr_wrapper.distribution.f[BNE];
-//         // real mfaaa = distr_wrapper.distribution.f[BSW];
-//         // real mfcaa = distr_wrapper.distribution.f[BSE];
-//         // real mfaca = distr_wrapper.distribution.f[BNW];
-
-        
-//         real mfcbb = (dist.f[E])[k];
-//         real mfabb = (dist.f[W])[kw];
-//         real mfbcb = (dist.f[N])[k];
-//         real mfbab = (dist.f[S])[ks];
-//         real mfbbc = (dist.f[T])[k];
-//         real mfbba = (dist.f[B])[kb];
-//         real mfccb = (dist.f[NE])[k];
-//         real mfaab = (dist.f[SW])[ksw];
-//         real mfcab = (dist.f[SE])[ks];
-//         real mfacb = (dist.f[NW])[kw];
-//         real mfcbc = (dist.f[TE])[k];
-//         real mfaba = (dist.f[BW])[kbw];
-//         real mfcba = (dist.f[BE])[kb];
-//         real mfabc = (dist.f[TW])[kw];
-//         real mfbcc = (dist.f[TN])[k];
-//         real mfbaa = (dist.f[BS])[kbs];
-//         real mfbca = (dist.f[BN])[kb];
-//         real mfbac = (dist.f[TS])[ks];
-//         real mfbbb = (dist.f[REST])[k];
-//         real mfccc = (dist.f[TNE])[k];
-//         real mfaac = (dist.f[TSW])[ksw];
-//         real mfcac = (dist.f[TSE])[ks];
-//         real mfacc = (dist.f[TNW])[kw];
-//         real mfcca = (dist.f[BNE])[kb];
-//         real mfaaa = (dist.f[BSW])[kbsw];
-//         real mfcaa = (dist.f[BSE])[kbs];
-//         real mfaca = (dist.f[BNW])[kbw];
-        
-//         //////////////////////////////////////////////////////(unsigned long)//////////////////////////////
-//         //! - Calculate density and velocity using pyramid summation for low round-off errors as in Eq. (J1)-(J3) \ref
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-//         //!
-//         // real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
-//         //              (((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) +
-//         //               ((mfacb + mfcab) + (mfaab + mfccb))) +
-//         //              ((mfabb + mfcbb) + (mfbab + mfbcb) + (mfbba + mfbbc))) +
-//         //             mfbbb;
-//         real drho = vf::lbm::getDensity(distr_wrapper.distribution.f);
-
-//         real rrho   = c1o1 + drho;
-//         real OOrho = c1o1 / rrho;
-
-//         // real vvx = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfcaa - mfacc) + (mfcca - mfaac))) +
-//         //             (((mfcba - mfabc) + (mfcbc - mfaba)) + ((mfcab - mfacb) + (mfccb - mfaab))) + (mfcbb - mfabb)) *
-//         //            OOrho;
-//         real vvx = vf::lbm::getCompressibleVelocityX1(distr_wrapper.distribution.f, drho);
-//         // real vvy = ((((mfccc - mfaaa) + (mfaca - mfcac)) + ((mfacc - mfcaa) + (mfcca - mfaac))) +
-//         //             (((mfbca - mfbac) + (mfbcc - mfbaa)) + ((mfacb - mfcab) + (mfccb - mfaab))) + (mfbcb - mfbab)) *
-//         //            OOrho;
-//         real vvy = vf::lbm::getCompressibleVelocityX2(distr_wrapper.distribution.f, drho);
-//         // real vvz = ((((mfccc - mfaaa) + (mfcac - mfaca)) + ((mfacc - mfcaa) + (mfaac - mfcca))) +
-//         //             (((mfbac - mfbca) + (mfbcc - mfbaa)) + ((mfabc - mfcba) + (mfcbc - mfaba))) + (mfbbc - mfbba)) *
-//         //            OOrho;
-//         real vvz = vf::lbm::getCompressibleVelocityX3(distr_wrapper.distribution.f, drho);
-//         // if(k==100000){printf("%f \t %f \t%f \t%f \n\n", drho, vvx, vvz, vvy);}
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Add half of the acceleration (body force) to the velocity as in Eq. (42) \ref
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-//         //!
-//         real factor = c1o1;
-//         for (size_t i = 1; i <= level; i++) {
-//             factor *= c2o1;
-//         }
-        
-//         real fx = forces[0];
-//         real fy = forces[1];
-//         real fz = forces[2];
-
-//         if( bodyForce ){
-//             fx += bodyForceX[k]; 
-//             fy += bodyForceY[k];
-//             fz += bodyForceZ[k];
-
-//             real vx = vvx;
-//             real vy = vvy;
-//             real vz = vvz;
-//             real acc_x = fx * c1o2 / factor;
-//             real acc_y = fy * c1o2 / factor;
-//             real acc_z = fz * c1o2 / factor;
-
-//             vvx += acc_x;
-//             vvy += acc_y;
-//             vvz += acc_z;
-            
-//         //    // Reset body force. To be used when not using round-off correction.
-//         // bodyForceX[k] = 0.0f;
-//         // bodyForceY[k] = 0.0f;
-//         // bodyForceZ[k] = 0.0f;
-
-//             ////////////////////////////////////////////////////////////////////////////////////
-//             //!> Round-off correction
-//             //!
-//             //!> Similar to Kahan summation algorithm (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
-//             //!> Essentially computes the round-off error of the applied force and adds it in the next time step as a compensation.
-//             //!> Seems to be necesseary at very high Re boundary layers, where the forcing and velocity can  
-//             //!> differ by several orders of magnitude.
-//             //!> \note 16/05/2022: Testing, still ongoing! 
-//             //!
-//             bodyForceX[k] = (acc_x-(double)(vvx-vx))*factor*c2o1;
-//             bodyForceY[k] = (acc_y-(double)(vvy-vy))*factor*c2o1;
-//             bodyForceZ[k] = (acc_z-(double)(vvz-vz))*factor*c2o1;
-
-//         }
-//         else{
-//             vvx += fx * c1o2 / factor;
-//             vvy += fy * c1o2 / factor;
-//             vvz += fz * c1o2 / factor;
-//         }
-        
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // calculate the square of velocities for this lattice node
-//         real vx2 = vvx * vvx;
-//         real vy2 = vvy * vvy;
-//         real vz2 = vvz * vvz;
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Set relaxation limiters for third order cumulants to default value \f$ \lambda=0.001 \f$ according to
-//         //! section 6 in \ref <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-//         real wadjust;
-//         real qudricLimitP = quadricLimiters[0];
-//         real qudricLimitM = quadricLimiters[1];
-//         real qudricLimitD = quadricLimiters[2];
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Chimera transform from well conditioned distributions to central moments as defined in Appendix J in \ref
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (6)-(14) in \ref <a
-//         //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//         //! ]</b></a>
-//         //!
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // Z - Dir
-//         forwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-//         forwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-//         forwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-//         forwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-//         forwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-//         forwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-//         forwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-//         forwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-//         forwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // Y - Dir
-//         forwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-//         forwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-//         forwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-//         forwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-//         forwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-//         forwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-//         forwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-//         forwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-//         forwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // X - Dir
-//         forwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-//         forwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-//         forwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-//         forwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-//         forwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-//         forwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-//         forwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-//         forwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-//         forwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c3o1, c1o9);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Setting relaxation rates for non-hydrodynamic cumulants (default values). Variable names and equations
-//         //! according to <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!  => [NAME IN PAPER]=[NAME IN CODE]=[DEFAULT VALUE].
-//         //!  - Trace of second order cumulants \f$ C_{200}+C_{020}+C_{002} \f$ used to adjust bulk
-//         //!  viscosity:\f$\omega_2=OxxPyyPzz=1.0 \f$.
-//         //!  - Third order cumulants \f$ C_{120}+C_{102}, C_{210}+C_{012}, C_{201}+C_{021} \f$: \f$ \omega_3=OxyyPxzz
-//         //!  \f$ set according to Eq. (111) with simplifications assuming \f$ \omega_2=1.0\f$.
-//         //!  - Third order cumulants \f$ C_{120}-C_{102}, C_{210}-C_{012}, C_{201}-C_{021} \f$: \f$ \omega_4 = OxyyMxzz
-//         //!  \f$ set according to Eq. (112) with simplifications assuming \f$ \omega_2 = 1.0\f$.
-//         //!  - Third order cumulants \f$ C_{111} \f$: \f$ \omega_5 = Oxyz \f$ set according to Eq. (113) with
-//         //!  simplifications assuming \f$ \omega_2 = 1.0\f$  (modify for different bulk viscosity).
-//         //!  - Fourth order cumulants \f$ C_{220}, C_{202}, C_{022}, C_{211}, C_{121}, C_{112} \f$: for simplification
-//         //!  all set to the same default value \f$ \omega_6=\omega_7=\omega_8=O4=1.0 \f$.
-//         //!  - Fifth order cumulants \f$ C_{221}, C_{212}, C_{122}\f$: \f$\omega_9=O5=1.0\f$.
-//         //!  - Sixth order cumulant \f$ C_{222}\f$: \f$\omega_{10}=O6=1.0\f$.
-//         //!
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Calculate modified omega with turbulent viscosity
-//         //!
-//         real omega = omega_in / (c1o1 + c3o1*omega_in*turbulentViscosity[k]);
-//         ////////////////////////////////////////////////////////////
-//         // 2.
-//         real OxxPyyPzz = c1o1;
-//         ////////////////////////////////////////////////////////////
-//         // 3.
-//         real OxyyPxzz = c8o1 * (-c2o1 + omega) * (c1o1 + c2o1 * omega) / (-c8o1 - c14o1 * omega + c7o1 * omega * omega);
-//         real OxyyMxzz =
-//             c8o1 * (-c2o1 + omega) * (-c7o1 + c4o1 * omega) / (c56o1 - c50o1 * omega + c9o1 * omega * omega);
-//         real Oxyz = c24o1 * (-c2o1 + omega) * (-c2o1 - c7o1 * omega + c3o1 * omega * omega) /
-//                     (c48o1 + c152o1 * omega - c130o1 * omega * omega + c29o1 * omega * omega * omega);
-//         ////////////////////////////////////////////////////////////
-//         // 4.
-//         real O4 = c1o1;
-//         ////////////////////////////////////////////////////////////
-//         // 5.
-//         real O5 = c1o1;
-//         ////////////////////////////////////////////////////////////
-//         // 6.
-//         real O6 = c1o1;
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - A and B: parameters for fourth order convergence of the diffusion term according to Eq. (115) and (116)
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> with simplifications assuming \f$ \omega_2 = 1.0 \f$ (modify for
-//         //! different bulk viscosity).
-//         //!
-//         real A = (c4o1 + c2o1 * omega - c3o1 * omega * omega) / (c2o1 - c7o1 * omega + c5o1 * omega * omega);
-//         real B = (c4o1 + c28o1 * omega - c14o1 * omega * omega) / (c6o1 - c21o1 * omega + c15o1 * omega * omega);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Compute cumulants from central moments according to Eq. (20)-(23) in
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-//         ////////////////////////////////////////////////////////////
-//         // 4.
-//         real CUMcbb = mfcbb - ((mfcaa + c1o3) * mfabb + c2o1 * mfbba * mfbab) * OOrho;
-//         real CUMbcb = mfbcb - ((mfaca + c1o3) * mfbab + c2o1 * mfbba * mfabb) * OOrho;
-//         real CUMbbc = mfbbc - ((mfaac + c1o3) * mfbba + c2o1 * mfbab * mfabb) * OOrho;
-
-//         real CUMcca =
-//             mfcca - (((mfcaa * mfaca + c2o1 * mfbba * mfbba) + c1o3 * (mfcaa + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-//         real CUMcac =
-//             mfcac - (((mfcaa * mfaac + c2o1 * mfbab * mfbab) + c1o3 * (mfcaa + mfaac)) * OOrho - c1o9 * (drho * OOrho));
-//         real CUMacc =
-//             mfacc - (((mfaac * mfaca + c2o1 * mfabb * mfabb) + c1o3 * (mfaac + mfaca)) * OOrho - c1o9 * (drho * OOrho));
-//         ////////////////////////////////////////////////////////////
-//         // 5.
-//         real CUMbcc =
-//             mfbcc - ((mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb + c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-//                      c1o3 * (mfbca + mfbac)) *
-//                         OOrho;
-//         real CUMcbc =
-//             mfcbc - ((mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb + c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-//                      c1o3 * (mfcba + mfabc)) *
-//                         OOrho;
-//         real CUMccb =
-//             mfccb - ((mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb + c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-//                      c1o3 * (mfacb + mfcab)) *
-//                         OOrho;
-//         ////////////////////////////////////////////////////////////
-//         // 6.
-//         real CUMccc = mfccc + ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-//                                 c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-//                                 c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-//                                    OOrho +
-//                                (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-//                                 c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-//                                    OOrho * OOrho -
-//                                c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-//                                (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-//                                 (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-//                                    OOrho * OOrho * c2o3 +
-//                                c1o27 * ((drho * drho - drho) * OOrho * OOrho));
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Compute linear combinations of second and third order cumulants
-//         //!
-//         ////////////////////////////////////////////////////////////
-//         // 2.
-//         real mxxPyyPzz = mfcaa + mfaca + mfaac;
-//         real mxxMyy    = mfcaa - mfaca;
-//         real mxxMzz    = mfcaa - mfaac;
-//         ////////////////////////////////////////////////////////////
-//         // 3.
-//         real mxxyPyzz = mfcba + mfabc;
-//         real mxxyMyzz = mfcba - mfabc;
-
-//         real mxxzPyyz = mfcab + mfacb;
-//         real mxxzMyyz = mfcab - mfacb;
-
-//         real mxyyPxzz = mfbca + mfbac;
-//         real mxyyMxzz = mfbca - mfbac;
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // incl. correction
-//         ////////////////////////////////////////////////////////////
-//         //! - Compute velocity  gradients from second order cumulants according to Eq. (27)-(32)
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a> Further explanations of the correction in viscosity in Appendix H of
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> Note that the division by rho is omitted here as we need rho times
-//         //! the gradients later.
-//         //!
-//         real Dxy  = -c3o1 * omega * mfbba;
-//         real Dxz  = -c3o1 * omega * mfbab;
-//         real Dyz  = -c3o1 * omega * mfabb;
-//         real dxux = c1o2 * (-omega) * (mxxMyy + mxxMzz) + c1o2 * OxxPyyPzz * (mfaaa - mxxPyyPzz);
-//         real dyuy = dxux + omega * c3o2 * mxxMyy;
-//         real dzuz = dxux + omega * c3o2 * mxxMzz;
-
-//         //Smagorinsky for debugging
-//         // if(true)
-//         // {   
-//             // if(false && k==99976)
-//             // {
-//             //     printf("dudz+dwdu: \t %1.14f \n", Dxz );
-//             //     printf("dvdz+dudy: \t %1.14f \n", Dxy );  
-//             //     printf("dwdy+dvdz: \t %1.14f \n", Dyz );  
-//             //     printf("nu_t * dudz+dwdu: \t %1.14f \n", turbulentViscosity[k]*Dxz );
-//             //     printf("nu_t * dvdz+dudy: \t %1.14f \n", turbulentViscosity[k]*Dxy );  
-//             //     printf("nu_t * dwdy+dvdz: \t %1.14f \n", turbulentViscosity[k]*Dyz );      
-//             // } 
-//         //     real Sbar = sqrt(c2o1*(dxux*dxux+dyuy*dyuy+dzuz*dzuz)+Dxy*Dxy+Dxz*Dxz+Dyz*Dyz);
-//         //     real Cs = 0.08f;
-//         //     turbulentViscosity[k] = Cs*Cs*Sbar;
-//         // }
-
-//         ////////////////////////////////////////////////////////////
-//         //! - Relaxation of second order cumulants with correction terms according to Eq. (33)-(35) in
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-//         mxxPyyPzz +=
-//             OxxPyyPzz * (mfaaa - mxxPyyPzz) - c3o1 * (c1o1 - c1o2 * OxxPyyPzz) * (vx2 * dxux + vy2 * dyuy + vz2 * dzuz);
-//         mxxMyy += omega * (-mxxMyy) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vy2 * dyuy);
-//         mxxMzz += omega * (-mxxMzz) - c3o1 * (c1o1 + c1o2 * (-omega)) * (vx2 * dxux - vz2 * dzuz);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         ////no correction
-//         // mxxPyyPzz += OxxPyyPzz*(mfaaa - mxxPyyPzz);
-//         // mxxMyy += -(-omega) * (-mxxMyy);
-//         // mxxMzz += -(-omega) * (-mxxMzz);
-//         //////////////////////////////////////////////////////////////////////////
-//         mfabb += omega * (-mfabb);
-//         mfbab += omega * (-mfbab);
-//         mfbba += omega * (-mfbba);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // relax
-//         //////////////////////////////////////////////////////////////////////////
-//         // incl. limiter
-//         //! - Relaxation of third order cumulants including limiter according to Eq. (116)-(123)
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-//         wadjust = Oxyz + (c1o1 - Oxyz) * abs(mfbbb) / (abs(mfbbb) + qudricLimitD);
-//         mfbbb += wadjust * (-mfbbb);
-//         wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxyPyzz) / (abs(mxxyPyzz) + qudricLimitP);
-//         mxxyPyzz += wadjust * (-mxxyPyzz);
-//         wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxyMyzz) / (abs(mxxyMyzz) + qudricLimitM);
-//         mxxyMyzz += wadjust * (-mxxyMyzz);
-//         wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxxzPyyz) / (abs(mxxzPyyz) + qudricLimitP);
-//         mxxzPyyz += wadjust * (-mxxzPyyz);
-//         wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxxzMyyz) / (abs(mxxzMyyz) + qudricLimitM);
-//         mxxzMyyz += wadjust * (-mxxzMyyz);
-//         wadjust = OxyyPxzz + (c1o1 - OxyyPxzz) * abs(mxyyPxzz) / (abs(mxyyPxzz) + qudricLimitP);
-//         mxyyPxzz += wadjust * (-mxyyPxzz);
-//         wadjust = OxyyMxzz + (c1o1 - OxyyMxzz) * abs(mxyyMxzz) / (abs(mxyyMxzz) + qudricLimitM);
-//         mxyyMxzz += wadjust * (-mxyyMxzz);
-//         //////////////////////////////////////////////////////////////////////////
-//         // no limiter
-//         // mfbbb += OxyyMxzz * (-mfbbb);
-//         // mxxyPyzz += OxyyPxzz * (-mxxyPyzz);
-//         // mxxyMyzz += OxyyMxzz * (-mxxyMyzz);
-//         // mxxzPyyz += OxyyPxzz * (-mxxzPyyz);
-//         // mxxzMyyz += OxyyMxzz * (-mxxzMyyz);
-//         // mxyyPxzz += OxyyPxzz * (-mxyyPxzz);
-//         // mxyyMxzz += OxyyMxzz * (-mxyyMxzz);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Compute inverse linear combinations of second and third order cumulants
-//         //!
-//         mfcaa = c1o3 * (mxxMyy + mxxMzz + mxxPyyPzz);
-//         mfaca = c1o3 * (-c2o1 * mxxMyy + mxxMzz + mxxPyyPzz);
-//         mfaac = c1o3 * (mxxMyy - c2o1 * mxxMzz + mxxPyyPzz);
-
-//         mfcba = (mxxyMyzz + mxxyPyzz) * c1o2;
-//         mfabc = (-mxxyMyzz + mxxyPyzz) * c1o2;
-//         mfcab = (mxxzMyyz + mxxzPyyz) * c1o2;
-//         mfacb = (-mxxzMyyz + mxxzPyyz) * c1o2;
-//         mfbca = (mxyyMxzz + mxyyPxzz) * c1o2;
-//         mfbac = (-mxyyMxzz + mxyyPxzz) * c1o2;
-//         //////////////////////////////////////////////////////////////////////////
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 4.
-//         // no limiter
-//         //! - Relax fourth order cumulants to modified equilibrium for fourth order convergence of diffusion according
-//         //! to Eq. (43)-(48) <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-//         CUMacc = -O4 * (c1o1 / omega - c1o2) * (dyuy + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMacc);
-//         CUMcac = -O4 * (c1o1 / omega - c1o2) * (dxux + dzuz) * c2o3 * A + (c1o1 - O4) * (CUMcac);
-//         CUMcca = -O4 * (c1o1 / omega - c1o2) * (dyuy + dxux) * c2o3 * A + (c1o1 - O4) * (CUMcca);
-//         CUMbbc = -O4 * (c1o1 / omega - c1o2) * Dxy * c1o3 * B + (c1o1 - O4) * (CUMbbc);
-//         CUMbcb = -O4 * (c1o1 / omega - c1o2) * Dxz * c1o3 * B + (c1o1 - O4) * (CUMbcb);
-//         CUMcbb = -O4 * (c1o1 / omega - c1o2) * Dyz * c1o3 * B + (c1o1 - O4) * (CUMcbb);
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 5.
-//         CUMbcc += O5 * (-CUMbcc);
-//         CUMcbc += O5 * (-CUMcbc);
-//         CUMccb += O5 * (-CUMccb);
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 6.
-//         CUMccc += O6 * (-CUMccc);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Compute central moments from post collision cumulants according to Eq. (53)-(56) in
-//         //! <a href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.1016/j.jcp.2017.05.040 ]</b></a>
-//         //!
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 4.
-//         mfcbb = CUMcbb + c1o3 * ((c3o1 * mfcaa + c1o1) * mfabb + c6o1 * mfbba * mfbab) * OOrho;
-//         mfbcb = CUMbcb + c1o3 * ((c3o1 * mfaca + c1o1) * mfbab + c6o1 * mfbba * mfabb) * OOrho;
-//         mfbbc = CUMbbc + c1o3 * ((c3o1 * mfaac + c1o1) * mfbba + c6o1 * mfbab * mfabb) * OOrho;
-
-//         mfcca =
-//             CUMcca +
-//             (((mfcaa * mfaca + c2o1 * mfbba * mfbba) * c9o1 + c3o1 * (mfcaa + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-//         mfcac =
-//             CUMcac +
-//             (((mfcaa * mfaac + c2o1 * mfbab * mfbab) * c9o1 + c3o1 * (mfcaa + mfaac)) * OOrho - (drho * OOrho)) * c1o9;
-//         mfacc =
-//             CUMacc +
-//             (((mfaac * mfaca + c2o1 * mfabb * mfabb) * c9o1 + c3o1 * (mfaac + mfaca)) * OOrho - (drho * OOrho)) * c1o9;
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 5.
-//         mfbcc = CUMbcc + c1o3 *
-//                              (c3o1 * (mfaac * mfbca + mfaca * mfbac + c4o1 * mfabb * mfbbb +
-//                                       c2o1 * (mfbab * mfacb + mfbba * mfabc)) +
-//                               (mfbca + mfbac)) *
-//                              OOrho;
-//         mfcbc = CUMcbc + c1o3 *
-//                              (c3o1 * (mfaac * mfcba + mfcaa * mfabc + c4o1 * mfbab * mfbbb +
-//                                       c2o1 * (mfabb * mfcab + mfbba * mfbac)) +
-//                               (mfcba + mfabc)) *
-//                              OOrho;
-//         mfccb = CUMccb + c1o3 *
-//                              (c3o1 * (mfcaa * mfacb + mfaca * mfcab + c4o1 * mfbba * mfbbb +
-//                                       c2o1 * (mfbab * mfbca + mfabb * mfcba)) +
-//                               (mfacb + mfcab)) *
-//                              OOrho;
-
-//         //////////////////////////////////////////////////////////////////////////
-//         // 6.
-//         mfccc = CUMccc - ((-c4o1 * mfbbb * mfbbb - (mfcaa * mfacc + mfaca * mfcac + mfaac * mfcca) -
-//                            c4o1 * (mfabb * mfcbb + mfbab * mfbcb + mfbba * mfbbc) -
-//                            c2o1 * (mfbca * mfbac + mfcba * mfabc + mfcab * mfacb)) *
-//                               OOrho +
-//                           (c4o1 * (mfbab * mfbab * mfaca + mfabb * mfabb * mfcaa + mfbba * mfbba * mfaac) +
-//                            c2o1 * (mfcaa * mfaca * mfaac) + c16o1 * mfbba * mfbab * mfabb) *
-//                               OOrho * OOrho -
-//                           c1o3 * (mfacc + mfcac + mfcca) * OOrho - c1o9 * (mfcaa + mfaca + mfaac) * OOrho +
-//                           (c2o1 * (mfbab * mfbab + mfabb * mfabb + mfbba * mfbba) +
-//                            (mfaac * mfaca + mfaac * mfcaa + mfaca * mfcaa) + c1o3 * (mfaac + mfaca + mfcaa)) *
-//                               OOrho * OOrho * c2o3 +
-//                           c1o27 * ((drho * drho - drho) * OOrho * OOrho));
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! -  Add acceleration (body force) to first order cumulants according to Eq. (85)-(87) in
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a>
-//         //!
-//         mfbaa = -mfbaa;
-//         mfaba = -mfaba;
-//         mfaab = -mfaab;
-
-
-//         //Write to array here to distribute read/write
-//         rho[k] = drho;
-//         vx[k] = vvx;
-//         vy[k] = vvy;
-//         vz[k] = vvz;
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Chimera transform from central moments to well conditioned distributions as defined in Appendix J in
-//         //! <a href="https://doi.org/10.1016/j.camwa.2015.05.001"><b>[ M. Geier et al. (2015),
-//         //! DOI:10.1016/j.camwa.2015.05.001 ]</b></a> see also Eq. (88)-(96) in <a
-//         //! href="https://doi.org/10.1016/j.jcp.2017.05.040"><b>[ M. Geier et al. (2017), DOI:10.1016/j.jcp.2017.05.040
-//         //! ]</b></a>
-//         //!
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // X - Dir
-//         backwardInverseChimeraWithK(mfaaa, mfbaa, mfcaa, vvx, vx2, c1o1, c1o1);
-//         backwardChimera(mfaba, mfbba, mfcba, vvx, vx2);
-//         backwardInverseChimeraWithK(mfaca, mfbca, mfcca, vvx, vx2, c3o1, c1o3);
-//         backwardChimera(mfaab, mfbab, mfcab, vvx, vx2);
-//         backwardChimera(mfabb, mfbbb, mfcbb, vvx, vx2);
-//         backwardChimera(mfacb, mfbcb, mfccb, vvx, vx2);
-//         backwardInverseChimeraWithK(mfaac, mfbac, mfcac, vvx, vx2, c3o1, c1o3);
-//         backwardChimera(mfabc, mfbbc, mfcbc, vvx, vx2);
-//         backwardInverseChimeraWithK(mfacc, mfbcc, mfccc, vvx, vx2, c9o1, c1o9);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // Y - Dir
-//         backwardInverseChimeraWithK(mfaaa, mfaba, mfaca, vvy, vy2, c6o1, c1o6);
-//         backwardChimera(mfaab, mfabb, mfacb, vvy, vy2);
-//         backwardInverseChimeraWithK(mfaac, mfabc, mfacc, vvy, vy2, c18o1, c1o18);
-//         backwardInverseChimeraWithK(mfbaa, mfbba, mfbca, vvy, vy2, c3o2, c2o3);
-//         backwardChimera(mfbab, mfbbb, mfbcb, vvy, vy2);
-//         backwardInverseChimeraWithK(mfbac, mfbbc, mfbcc, vvy, vy2, c9o2, c2o9);
-//         backwardInverseChimeraWithK(mfcaa, mfcba, mfcca, vvy, vy2, c6o1, c1o6);
-//         backwardChimera(mfcab, mfcbb, mfccb, vvy, vy2);
-//         backwardInverseChimeraWithK(mfcac, mfcbc, mfccc, vvy, vy2, c18o1, c1o18);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         // Z - Dir
-//         backwardInverseChimeraWithK(mfaaa, mfaab, mfaac, vvz, vz2, c36o1, c1o36);
-//         backwardInverseChimeraWithK(mfaba, mfabb, mfabc, vvz, vz2, c9o1, c1o9);
-//         backwardInverseChimeraWithK(mfaca, mfacb, mfacc, vvz, vz2, c36o1, c1o36);
-//         backwardInverseChimeraWithK(mfbaa, mfbab, mfbac, vvz, vz2, c9o1, c1o9);
-//         backwardInverseChimeraWithK(mfbba, mfbbb, mfbbc, vvz, vz2, c9o4, c4o9);
-//         backwardInverseChimeraWithK(mfbca, mfbcb, mfbcc, vvz, vz2, c9o1, c1o9);
-//         backwardInverseChimeraWithK(mfcaa, mfcab, mfcac, vvz, vz2, c36o1, c1o36);
-//         backwardInverseChimeraWithK(mfcba, mfcbb, mfcbc, vvz, vz2, c9o1, c1o9);
-//         backwardInverseChimeraWithK(mfcca, mfccb, mfccc, vvz, vz2, c36o1, c1o36);
-
-//         ////////////////////////////////////////////////////////////////////////////////////
-//         //! - Write distributions: style of reading and writing the distributions from/to
-//         //! stored arrays dependent on timestep is based on the esoteric twist algorithm
-//         //! <a href="https://doi.org/10.3390/computation5020019"><b>[ M. Geier et al. (2017),
-//         //! DOI:10.3390/computation5020019 ]</b></a>
-//         //!
-
-
-//         distr_wrapper.distribution.f[E]      = mfabb;
-//         distr_wrapper.distribution.f[W]      = mfcbb;
-//         distr_wrapper.distribution.f[N]      = mfbab;
-//         distr_wrapper.distribution.f[S]      = mfbcb;
-//         distr_wrapper.distribution.f[T]      = mfbba;
-//         distr_wrapper.distribution.f[B]      = mfbbc;
-//         distr_wrapper.distribution.f[NE]     = mfaab;
-//         distr_wrapper.distribution.f[SW]     = mfccb;
-//         distr_wrapper.distribution.f[SE]     = mfacb;
-//         distr_wrapper.distribution.f[NW]     = mfcab;
-//         distr_wrapper.distribution.f[TE]     = mfaba;
-//         distr_wrapper.distribution.f[BW]     = mfcbc;
-//         distr_wrapper.distribution.f[BE]     = mfabc;
-//         distr_wrapper.distribution.f[TW]     = mfcba;
-//         distr_wrapper.distribution.f[TN]     = mfbaa;
-//         distr_wrapper.distribution.f[BS]     = mfbcc;
-//         distr_wrapper.distribution.f[BN]     = mfbac;
-//         distr_wrapper.distribution.f[TS]     = mfbca;
-//         distr_wrapper.distribution.f[REST]   = mfbbb;
-//         distr_wrapper.distribution.f[TNE]    = mfaaa;
-//         distr_wrapper.distribution.f[TSW]    = mfaca;
-//         distr_wrapper.distribution.f[TSE]    = mfaac;
-//         distr_wrapper.distribution.f[TNW]    = mfacc;
-//         distr_wrapper.distribution.f[BNE]    = mfcaa;
-//         distr_wrapper.distribution.f[BSW]    = mfcca;
-//         distr_wrapper.distribution.f[BSE]    = mfcac;
-//         distr_wrapper.distribution.f[BNW]    = mfccc;
-
-//         distr_wrapper.write();
-//         if(k==100000)
-//         {
-//             printf("mfcbb \t %f \t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n\n", 
-//                                                 (dist.f[E])[k]                  ,        
-//                                                 (dist.f[N])[k]       ,
-//                                                 (dist.f[S])[ks]      ,
-//                                                 (dist.f[T])[k]       ,
-//                                                 (dist.f[B])[kb]      ,
-//                                                 (dist.f[NE])[k]      ,
-//                                                 (dist.f[SW])[ksw]    ,
-//                                                 (dist.f[SE])[ks]     ,
-//                                                 (dist.f[NW])[kw]   ,
-//                                                 (dist.f[W])[kw]    ,
-//                                                 (dist.f[TE])[k]    ,
-//                                                 (dist.f[BW])[kbw]  ,
-//                                                 (dist.f[BE])[kb]   ,
-//                                                 (dist.f[TW])[kw]   ,
-//                                                 (dist.f[TN])[k]    ,
-//                                                 (dist.f[BS])[kbs]  ,
-//                                                 (dist.f[BN])[kb]   ,
-//                                                 (dist.f[TS])[ks]   ,
-//                                                 (dist.f[REST])[k]  ,
-//                                                 (dist.f[TNE])[k]   ,
-//                                                 (dist.f[TSE])[ks]  ,
-//                                                 (dist.f[BNE])[kb]  ,
-//                                                 (dist.f[BSE])[kbs] ,
-//                                                 (dist.f[TNW])[kw]  ,
-//                                                 (dist.f[TSW])[ksw] ,
-//                                                 (dist.f[BNW])[kbw] ,
-//                                                 (dist.f[BSW])[kbsw]);
-//         }
-
-//         (dist.f[E])[k]      = mfabb;
-//         (dist.f[W])[kw]     = mfcbb;
-//         (dist.f[N])[k]      = mfbab;
-//         (dist.f[S])[ks]     = mfbcb;
-//         (dist.f[T])[k]      = mfbba;
-//         (dist.f[B])[kb]     = mfbbc;
-//         (dist.f[NE])[k]     = mfaab;
-//         (dist.f[SW])[ksw]   = mfccb;
-//         (dist.f[SE])[ks]    = mfacb;
-//         (dist.f[NW])[kw]    = mfcab;
-//         (dist.f[TE])[k]     = mfaba;
-//         (dist.f[BW])[kbw]   = mfcbc;
-//         (dist.f[BE])[kb]    = mfabc;
-//         (dist.f[TW])[kw]    = mfcba;
-//         (dist.f[TN])[k]     = mfbaa;
-//         (dist.f[BS])[kbs]   = mfbcc;
-//         (dist.f[BN])[kb]    = mfbac;
-//         (dist.f[TS])[ks]    = mfbca;
-//         (dist.f[REST])[k]   = mfbbb;
-//         (dist.f[TNE])[k]    = mfaaa;
-//         (dist.f[TSE])[ks]   = mfaca;
-//         (dist.f[BNE])[kb]   = mfaac;
-//         (dist.f[BSE])[kbs]  = mfacc;
-//         (dist.f[TNW])[kw]   = mfcaa;
-//         (dist.f[TSW])[ksw]  = mfcca;
-//         (dist.f[BNW])[kbw]  = mfcac;
-//         (dist.f[BSW])[kbsw] = mfccc;
-        
-//         if(k==100000)
-//         {
-//             printf("mfcbb \t %f \t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n\n\n", 
-//                                                 (dist.f[E])[k]                  ,        
-//                                                 (dist.f[N])[k]       ,
-//                                                 (dist.f[S])[ks]      ,
-//                                                 (dist.f[T])[k]       ,
-//                                                 (dist.f[B])[kb]      ,
-//                                                 (dist.f[NE])[k]      ,
-//                                                 (dist.f[SW])[ksw]    ,
-//                                                 (dist.f[SE])[ks]     ,
-//                                                 (dist.f[NW])[kw]   ,
-//                                                 (dist.f[W])[kw]   ,
-//                                                 (dist.f[TE])[k]    ,
-//                                                 (dist.f[BW])[kbw]  ,
-//                                                 (dist.f[BE])[kb]   ,
-//                                                 (dist.f[TW])[kw]   ,
-//                                                 (dist.f[TN])[k]    ,
-//                                                 (dist.f[BS])[kbs]  ,
-//                                                 (dist.f[BN])[kb]   ,
-//                                                 (dist.f[TS])[ks]   ,
-//                                                 (dist.f[REST])[k]  ,
-//                                                 (dist.f[TNE])[k]   ,
-//                                                 (dist.f[TSE])[ks]  ,
-//                                                 (dist.f[BNE])[kb]  ,
-//                                                 (dist.f[BSE])[kbs] ,
-//                                                 (dist.f[TNW])[kw]  ,
-//                                                 (dist.f[TSW])[ksw] ,
-//                                                 (dist.f[BNW])[kbw] ,
-//                                                 (dist.f[BSW])[kbsw]);
-//         }
-//     }
-// }
\ No newline at end of file
+template __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim < TurbulenceModel::QR > ( real omega_in, uint* typeOfGridNode, uint* neighborX, uint* neighborY, uint* neighborZ, real* distributions, real* rho, real* vx, real* vy, real* vz, real* turbulentViscosity, real SGSconstant, unsigned long size_Mat, int level, bool bodyForce, real* forces, real* bodyForceX, real* bodyForceY, real* bodyForceZ, real* quadricLimiters, bool isEvenTimestep);
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
index 7f6738a9b6e39d63775a6490c1248f020fb4ccca..5ef37557399f263d25edf03b02b00f6a03c6e1cb 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/TurbulentViscosityKernels/FluidFlow/Compressible/CumulantK17chim/TurbulentViscosityCumulantK17CompChim_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
+template< TurbulenceModel turbulenceModel > __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
 	real omega_in,
 	uint* typeOfGridNode,
 	uint* neighborX,
@@ -16,6 +16,7 @@ extern "C" __global__ void LB_Kernel_TurbulentViscosityCumulantK17CompChim(
     real* vy,
     real* vz,
 	real* turbulentViscosity,
+	real SGSconstant,
 	unsigned long size_Mat,
 	int level,
 	bool bodyForce,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
index fa3f22d021518553df0dee79079868e01f4a2427..3da25060e6c82ea685a1659fecc8cf66eeaf44c4 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(
+__global__ void LB_Kernel_WaleCumulantK15Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -46,63 +46,63 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -136,33 +136,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1180,33 +1180,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;                                                                    
-			(D.f[ W   ])[kw  ] = mfcbb;                                                                  
-			(D.f[ N   ])[k   ] = mfbab;
-			(D.f[ S   ])[ks  ] = mfbcb;
-			(D.f[ T   ])[k   ] = mfbba;
-			(D.f[ B   ])[kb  ] = mfbbc;
-			(D.f[ NE  ])[k   ] = mfaab;
-			(D.f[ SW  ])[ksw ] = mfccb;
-			(D.f[ SE  ])[ks  ] = mfacb;
-			(D.f[ NW  ])[kw  ] = mfcab;
-			(D.f[ TE  ])[k   ] = mfaba;
-			(D.f[ BW  ])[kbw ] = mfcbc;
-			(D.f[ BE  ])[kb  ] = mfabc;
-			(D.f[ TW  ])[kw  ] = mfcba;
-			(D.f[ TN  ])[k   ] = mfbaa;
-			(D.f[ BS  ])[kbs ] = mfbcc;
-			(D.f[ BN  ])[kb  ] = mfbac;
-			(D.f[ TS  ])[ks  ] = mfbca;
-			(D.f[ REST])[k   ] = mfbbb;
-			(D.f[ TNE ])[k   ] = mfaaa;
-			(D.f[ TSE ])[ks  ] = mfaca;
-			(D.f[ BNE ])[kb  ] = mfaac;
-			(D.f[ BSE ])[kbs ] = mfacc;
-			(D.f[ TNW ])[kw  ] = mfcaa;
-			(D.f[ TSW ])[ksw ] = mfcca;
-			(D.f[ BNW ])[kbw ] = mfcac;
-			(D.f[ BSW ])[kbsw] = mfccc;
+			(D.f[ DIR_P00   ])[k   ] = mfabb;                                                                    
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;                                                                  
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;
+			(D.f[ DIR_00P   ])[k   ] = mfbba;
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[ DIR_000])[k   ] = mfbbb;
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
index 409d7d0862479299e92c93aaa50085d673bb95f0..fad3eb11434b9c3fd216a7698b9275d4af43245c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15/WaleCumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK15Comp(real omega,
+__global__ void LB_Kernel_WaleCumulantK15Comp(real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
index c705be07519225534d3532a2b752d2a29516b102..511219c352c4d156428565f718191a70b9cc6c32 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
+__global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -45,63 +45,63 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -115,33 +115,33 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1004,33 +1004,33 @@ extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;                                                                    
-			(D.f[ W   ])[kw  ] = mfcbb;                                                                  
-			(D.f[ N   ])[k   ] = mfbab;
-			(D.f[ S   ])[ks  ] = mfbcb;
-			(D.f[ T   ])[k   ] = mfbba;
-			(D.f[ B   ])[kb  ] = mfbbc;
-			(D.f[ NE  ])[k   ] = mfaab;
-			(D.f[ SW  ])[ksw ] = mfccb;
-			(D.f[ SE  ])[ks  ] = mfacb;
-			(D.f[ NW  ])[kw  ] = mfcab;
-			(D.f[ TE  ])[k   ] = mfaba;
-			(D.f[ BW  ])[kbw ] = mfcbc;
-			(D.f[ BE  ])[kb  ] = mfabc;
-			(D.f[ TW  ])[kw  ] = mfcba;
-			(D.f[ TN  ])[k   ] = mfbaa;
-			(D.f[ BS  ])[kbs ] = mfbcc;
-			(D.f[ BN  ])[kb  ] = mfbac;
-			(D.f[ TS  ])[ks  ] = mfbca;
-			(D.f[ REST])[k   ] = mfbbb;
-			(D.f[ TNE ])[k   ] = mfaaa;
-			(D.f[ TSE ])[ks  ] = mfaca;
-			(D.f[ BNE ])[kb  ] = mfaac;
-			(D.f[ BSE ])[kbs ] = mfacc;
-			(D.f[ TNW ])[kw  ] = mfcaa;
-			(D.f[ TSW ])[ksw ] = mfcca;
-			(D.f[ BNW ])[kbw ] = mfcac;
-			(D.f[ BSW ])[kbsw] = mfccc;
+			(D.f[ DIR_P00   ])[k   ] = mfabb;                                                                    
+			(D.f[ DIR_M00   ])[kw  ] = mfcbb;                                                                  
+			(D.f[ DIR_0P0   ])[k   ] = mfbab;
+			(D.f[ DIR_0M0   ])[ks  ] = mfbcb;
+			(D.f[ DIR_00P   ])[k   ] = mfbba;
+			(D.f[ DIR_00M   ])[kb  ] = mfbbc;
+			(D.f[ DIR_PP0  ])[k   ] = mfaab;
+			(D.f[ DIR_MM0  ])[ksw ] = mfccb;
+			(D.f[ DIR_PM0  ])[ks  ] = mfacb;
+			(D.f[ DIR_MP0  ])[kw  ] = mfcab;
+			(D.f[ DIR_P0P  ])[k   ] = mfaba;
+			(D.f[ DIR_M0M  ])[kbw ] = mfcbc;
+			(D.f[ DIR_P0M  ])[kb  ] = mfabc;
+			(D.f[ DIR_M0P  ])[kw  ] = mfcba;
+			(D.f[ DIR_0PP  ])[k   ] = mfbaa;
+			(D.f[ DIR_0MM  ])[kbs ] = mfbcc;
+			(D.f[ DIR_0PM  ])[kb  ] = mfbac;
+			(D.f[ DIR_0MP  ])[ks  ] = mfbca;
+			(D.f[ DIR_000])[k   ] = mfbbb;
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
index 593a7cef915e605f28bfbcbd362a6600b6c6224d..4297404073aacc0acd01b84c35cbae3d1081ed5b 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK15BySoniMalav/WaleBySoniMalavCumulantK15Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(real omega_in,
+__global__ void LB_Kernel_WaleBySoniMalavCumulantK15Comp(real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
 	unsigned int* neighborY,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
index 380fc5258f32fb908a977f28a15688aa5d9b650d..8aaa13ab1d868e15ea5707d1566ba653b44c645d 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
+__global__ void LB_Kernel_WaleCumulantK17Comp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -47,63 +47,63 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
 			Distributions27 D;
 			if (EvenOrOdd==true)
 			{
-				D.f[E   ] = &DDStart[E   *size_Mat];
-				D.f[W   ] = &DDStart[W   *size_Mat];
-				D.f[N   ] = &DDStart[N   *size_Mat];
-				D.f[S   ] = &DDStart[S   *size_Mat];
-				D.f[T   ] = &DDStart[T   *size_Mat];
-				D.f[B   ] = &DDStart[B   *size_Mat];
-				D.f[NE  ] = &DDStart[NE  *size_Mat];
-				D.f[SW  ] = &DDStart[SW  *size_Mat];
-				D.f[SE  ] = &DDStart[SE  *size_Mat];
-				D.f[NW  ] = &DDStart[NW  *size_Mat];
-				D.f[TE  ] = &DDStart[TE  *size_Mat];
-				D.f[BW  ] = &DDStart[BW  *size_Mat];
-				D.f[BE  ] = &DDStart[BE  *size_Mat];
-				D.f[TW  ] = &DDStart[TW  *size_Mat];
-				D.f[TN  ] = &DDStart[TN  *size_Mat];
-				D.f[BS  ] = &DDStart[BS  *size_Mat];
-				D.f[BN  ] = &DDStart[BN  *size_Mat];
-				D.f[TS  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE ] = &DDStart[TNE *size_Mat];
-				D.f[TSW ] = &DDStart[TSW *size_Mat];
-				D.f[TSE ] = &DDStart[TSE *size_Mat];
-				D.f[TNW ] = &DDStart[TNW *size_Mat];
-				D.f[BNE ] = &DDStart[BNE *size_Mat];
-				D.f[BSW ] = &DDStart[BSW *size_Mat];
-				D.f[BSE ] = &DDStart[BSE *size_Mat];
-				D.f[BNW ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W   ] = &DDStart[E   *size_Mat];
-				D.f[E   ] = &DDStart[W   *size_Mat];
-				D.f[S   ] = &DDStart[N   *size_Mat];
-				D.f[N   ] = &DDStart[S   *size_Mat];
-				D.f[B   ] = &DDStart[T   *size_Mat];
-				D.f[T   ] = &DDStart[B   *size_Mat];
-				D.f[SW  ] = &DDStart[NE  *size_Mat];
-				D.f[NE  ] = &DDStart[SW  *size_Mat];
-				D.f[NW  ] = &DDStart[SE  *size_Mat];
-				D.f[SE  ] = &DDStart[NW  *size_Mat];
-				D.f[BW  ] = &DDStart[TE  *size_Mat];
-				D.f[TE  ] = &DDStart[BW  *size_Mat];
-				D.f[TW  ] = &DDStart[BE  *size_Mat];
-				D.f[BE  ] = &DDStart[TW  *size_Mat];
-				D.f[BS  ] = &DDStart[TN  *size_Mat];
-				D.f[TN  ] = &DDStart[BS  *size_Mat];
-				D.f[TS  ] = &DDStart[BN  *size_Mat];
-				D.f[BN  ] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW ] = &DDStart[TNE *size_Mat];
-				D.f[BNE ] = &DDStart[TSW *size_Mat];
-				D.f[BNW ] = &DDStart[TSE *size_Mat];
-				D.f[BSE ] = &DDStart[TNW *size_Mat];
-				D.f[TSW ] = &DDStart[BNE *size_Mat];
-				D.f[TNE ] = &DDStart[BSW *size_Mat];
-				D.f[TNW ] = &DDStart[BSE *size_Mat];
-				D.f[TSE ] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00   ] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00   ] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0   ] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0   ] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M   ] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P   ] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0  ] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0  ] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0  ] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0  ] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M  ] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P  ] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P  ] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M  ] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM  ] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP  ] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP  ] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM  ] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM ] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM ] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM ] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM ] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP ] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP ] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP ] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP ] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -137,33 +137,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E   ])[k  ];
-			real mfabb = (D.f[W   ])[kw ];
-			real mfbcb = (D.f[N   ])[k  ];
-			real mfbab = (D.f[S   ])[ks ];
-			real mfbbc = (D.f[T   ])[k  ];
-			real mfbba = (D.f[B   ])[kb ];
-			real mfccb = (D.f[NE  ])[k  ];
-			real mfaab = (D.f[SW  ])[ksw];
-			real mfcab = (D.f[SE  ])[ks ];
-			real mfacb = (D.f[NW  ])[kw ];
-			real mfcbc = (D.f[TE  ])[k  ];
-			real mfaba = (D.f[BW  ])[kbw];
-			real mfcba = (D.f[BE  ])[kb ];
-			real mfabc = (D.f[TW  ])[kw ];
-			real mfbcc = (D.f[TN  ])[k  ];
-			real mfbaa = (D.f[BS  ])[kbs];
-			real mfbca = (D.f[BN  ])[kb ];
-			real mfbac = (D.f[TS  ])[ks ];
-			real mfbbb = (D.f[REST])[k  ];
-			real mfccc = (D.f[TNE ])[k  ];
-			real mfaac = (D.f[TSW ])[ksw];
-			real mfcac = (D.f[TSE ])[ks ];
-			real mfacc = (D.f[TNW ])[kw ];
-			real mfcca = (D.f[BNE ])[kb ];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE ])[kbs];
-			real mfaca = (D.f[BNW ])[kbw];
+			real mfcbb = (D.f[DIR_P00   ])[k  ];
+			real mfabb = (D.f[DIR_M00   ])[kw ];
+			real mfbcb = (D.f[DIR_0P0   ])[k  ];
+			real mfbab = (D.f[DIR_0M0   ])[ks ];
+			real mfbbc = (D.f[DIR_00P   ])[k  ];
+			real mfbba = (D.f[DIR_00M   ])[kb ];
+			real mfccb = (D.f[DIR_PP0  ])[k  ];
+			real mfaab = (D.f[DIR_MM0  ])[ksw];
+			real mfcab = (D.f[DIR_PM0  ])[ks ];
+			real mfacb = (D.f[DIR_MP0  ])[kw ];
+			real mfcbc = (D.f[DIR_P0P  ])[k  ];
+			real mfaba = (D.f[DIR_M0M  ])[kbw];
+			real mfcba = (D.f[DIR_P0M  ])[kb ];
+			real mfabc = (D.f[DIR_M0P  ])[kw ];
+			real mfbcc = (D.f[DIR_0PP  ])[k  ];
+			real mfbaa = (D.f[DIR_0MM  ])[kbs];
+			real mfbca = (D.f[DIR_0PM  ])[kb ];
+			real mfbac = (D.f[DIR_0MP  ])[ks ];
+			real mfbbb = (D.f[DIR_000])[k  ];
+			real mfccc = (D.f[DIR_PPP ])[k  ];
+			real mfaac = (D.f[DIR_MMP ])[ksw];
+			real mfcac = (D.f[DIR_PMP ])[ks ];
+			real mfacc = (D.f[DIR_MPP ])[kw ];
+			real mfcca = (D.f[DIR_PPM ])[kb ];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM ])[kbs];
+			real mfaca = (D.f[DIR_MPM ])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc+mfaaa) + (mfaca+mfcac)) + ((mfacc+mfcaa) + (mfaac+mfcca))) + 
 							(((mfbac+mfbca) + (mfbaa+mfbcc)) + ((mfabc+mfcba) + (mfaba+mfcbc)) + ((mfacb+mfcab) + (mfaab+mfccb))) +
@@ -1142,33 +1142,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[ E   ])[k   ] = mfabb;                                                                    
-			(D.f[ W   ])[kw  ] = mfcbb;                                                                  
-			(D.f[ N   ])[k   ] = mfbab;
-			(D.f[ S   ])[ks  ] = mfbcb;
-			(D.f[ T   ])[k   ] = mfbba;
-			(D.f[ B   ])[kb  ] = mfbbc;
-			(D.f[ NE  ])[k   ] = mfaab;
-			(D.f[ SW  ])[ksw ] = mfccb;
-			(D.f[ SE  ])[ks  ] = mfacb;
-			(D.f[ NW  ])[kw  ] = mfcab;
-			(D.f[ TE  ])[k   ] = mfaba;
-			(D.f[ BW  ])[kbw ] = mfcbc;
-			(D.f[ BE  ])[kb  ] = mfabc;
-			(D.f[ TW  ])[kw  ] = mfcba;
-			(D.f[ TN  ])[k   ] = mfbaa;
-			(D.f[ BS  ])[kbs ] = mfbcc;
-			(D.f[ BN  ])[kb  ] = mfbac;
-			(D.f[ TS  ])[ks  ] = mfbca;
-			(D.f[ REST])[k   ] = mfbbb;
-			(D.f[ TNE ])[k   ] = mfaaa;
-			(D.f[ TSE ])[ks  ] = mfaca;
-			(D.f[ BNE ])[kb  ] = mfaac;
-			(D.f[ BSE ])[kbs ] = mfacc;
-			(D.f[ TNW ])[kw  ] = mfcaa;
-			(D.f[ TSW ])[ksw ] = mfcca;
-			(D.f[ BNW ])[kbw ] = mfcac;
-			(D.f[ BSW ])[kbsw] = mfccc;
+			(D.f[ DIR_P00 ])[k   ] = mfabb;
+			(D.f[ DIR_M00 ])[kw  ] = mfcbb;
+			(D.f[ DIR_0P0 ])[k   ] = mfbab;
+			(D.f[ DIR_0M0 ])[ks  ] = mfbcb;
+			(D.f[ DIR_00P ])[k   ] = mfbba;
+			(D.f[ DIR_00M ])[kb  ] = mfbbc;
+			(D.f[ DIR_PP0 ])[k   ] = mfaab;
+			(D.f[ DIR_MM0 ])[ksw ] = mfccb;
+			(D.f[ DIR_PM0 ])[ks  ] = mfacb;
+			(D.f[ DIR_MP0 ])[kw  ] = mfcab;
+			(D.f[ DIR_P0P ])[k   ] = mfaba;
+			(D.f[ DIR_M0M ])[kbw ] = mfcbc;
+			(D.f[ DIR_P0M ])[kb  ] = mfabc;
+			(D.f[ DIR_M0P ])[kw  ] = mfcba;
+			(D.f[ DIR_0PP ])[k   ] = mfbaa;
+			(D.f[ DIR_0MM ])[kbs ] = mfbcc;
+			(D.f[ DIR_0PM ])[kb  ] = mfbac;
+			(D.f[ DIR_0MP ])[ks  ] = mfbca;
+			(D.f[ DIR_000 ])[k   ] = mfbbb;
+			(D.f[ DIR_PPP ])[k   ] = mfaaa;
+			(D.f[ DIR_PMP ])[ks  ] = mfaca;
+			(D.f[ DIR_PPM ])[kb  ] = mfaac;
+			(D.f[ DIR_PMM ])[kbs ] = mfacc;
+			(D.f[ DIR_MPP ])[kw  ] = mfcaa;	
+			(D.f[ DIR_MMP ])[ksw ] = mfcca;
+			(D.f[ DIR_MPM ])[kbw ] = mfcac;
+			(D.f[ DIR_MMM ])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}                                                                                                                    
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
index 34e2c094cd9a2d7ecb1dc064db954d5b6e211f8f..459c833e2bd3f0bfd2a0c214a9d366bcfb3a4b49 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17/WaleCumulantK17Comp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17Comp(
+__global__ void LB_Kernel_WaleCumulantK17Comp(
 	real omega,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
index c702c252be0ca9a51561d8aba68bc21e0f3b9902..a1feba477a6555ea728311a6e99d5302652813ff 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
+__global__ void LB_Kernel_WaleCumulantK17DebugComp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
@@ -57,63 +57,63 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
 			Distributions27 D;
 			if (EvenOrOdd == true)
 			{
-				D.f[E] = &DDStart[E   *size_Mat];
-				D.f[W] = &DDStart[W   *size_Mat];
-				D.f[N] = &DDStart[N   *size_Mat];
-				D.f[S] = &DDStart[S   *size_Mat];
-				D.f[T] = &DDStart[T   *size_Mat];
-				D.f[B] = &DDStart[B   *size_Mat];
-				D.f[NE] = &DDStart[NE  *size_Mat];
-				D.f[SW] = &DDStart[SW  *size_Mat];
-				D.f[SE] = &DDStart[SE  *size_Mat];
-				D.f[NW] = &DDStart[NW  *size_Mat];
-				D.f[TE] = &DDStart[TE  *size_Mat];
-				D.f[BW] = &DDStart[BW  *size_Mat];
-				D.f[BE] = &DDStart[BE  *size_Mat];
-				D.f[TW] = &DDStart[TW  *size_Mat];
-				D.f[TN] = &DDStart[TN  *size_Mat];
-				D.f[BS] = &DDStart[BS  *size_Mat];
-				D.f[BN] = &DDStart[BN  *size_Mat];
-				D.f[TS] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[TNE] = &DDStart[TNE *size_Mat];
-				D.f[TSW] = &DDStart[TSW *size_Mat];
-				D.f[TSE] = &DDStart[TSE *size_Mat];
-				D.f[TNW] = &DDStart[TNW *size_Mat];
-				D.f[BNE] = &DDStart[BNE *size_Mat];
-				D.f[BSW] = &DDStart[BSW *size_Mat];
-				D.f[BSE] = &DDStart[BSE *size_Mat];
-				D.f[BNW] = &DDStart[BNW *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D.f[W] = &DDStart[E   *size_Mat];
-				D.f[E] = &DDStart[W   *size_Mat];
-				D.f[S] = &DDStart[N   *size_Mat];
-				D.f[N] = &DDStart[S   *size_Mat];
-				D.f[B] = &DDStart[T   *size_Mat];
-				D.f[T] = &DDStart[B   *size_Mat];
-				D.f[SW] = &DDStart[NE  *size_Mat];
-				D.f[NE] = &DDStart[SW  *size_Mat];
-				D.f[NW] = &DDStart[SE  *size_Mat];
-				D.f[SE] = &DDStart[NW  *size_Mat];
-				D.f[BW] = &DDStart[TE  *size_Mat];
-				D.f[TE] = &DDStart[BW  *size_Mat];
-				D.f[TW] = &DDStart[BE  *size_Mat];
-				D.f[BE] = &DDStart[TW  *size_Mat];
-				D.f[BS] = &DDStart[TN  *size_Mat];
-				D.f[TN] = &DDStart[BS  *size_Mat];
-				D.f[TS] = &DDStart[BN  *size_Mat];
-				D.f[BN] = &DDStart[TS  *size_Mat];
-				D.f[REST] = &DDStart[REST*size_Mat];
-				D.f[BSW] = &DDStart[TNE *size_Mat];
-				D.f[BNE] = &DDStart[TSW *size_Mat];
-				D.f[BNW] = &DDStart[TSE *size_Mat];
-				D.f[BSE] = &DDStart[TNW *size_Mat];
-				D.f[TSW] = &DDStart[BNE *size_Mat];
-				D.f[TNE] = &DDStart[BSW *size_Mat];
-				D.f[TNW] = &DDStart[BSE *size_Mat];
-				D.f[TSE] = &DDStart[BNW *size_Mat];
+				D.f[DIR_M00] = &DDStart[DIR_P00   *size_Mat];
+				D.f[DIR_P00] = &DDStart[DIR_M00   *size_Mat];
+				D.f[DIR_0M0] = &DDStart[DIR_0P0   *size_Mat];
+				D.f[DIR_0P0] = &DDStart[DIR_0M0   *size_Mat];
+				D.f[DIR_00M] = &DDStart[DIR_00P   *size_Mat];
+				D.f[DIR_00P] = &DDStart[DIR_00M   *size_Mat];
+				D.f[DIR_MM0] = &DDStart[DIR_PP0  *size_Mat];
+				D.f[DIR_PP0] = &DDStart[DIR_MM0  *size_Mat];
+				D.f[DIR_MP0] = &DDStart[DIR_PM0  *size_Mat];
+				D.f[DIR_PM0] = &DDStart[DIR_MP0  *size_Mat];
+				D.f[DIR_M0M] = &DDStart[DIR_P0P  *size_Mat];
+				D.f[DIR_P0P] = &DDStart[DIR_M0M  *size_Mat];
+				D.f[DIR_M0P] = &DDStart[DIR_P0M  *size_Mat];
+				D.f[DIR_P0M] = &DDStart[DIR_M0P  *size_Mat];
+				D.f[DIR_0MM] = &DDStart[DIR_0PP  *size_Mat];
+				D.f[DIR_0PP] = &DDStart[DIR_0MM  *size_Mat];
+				D.f[DIR_0MP] = &DDStart[DIR_0PM  *size_Mat];
+				D.f[DIR_0PM] = &DDStart[DIR_0MP  *size_Mat];
+				D.f[DIR_000] = &DDStart[DIR_000*size_Mat];
+				D.f[DIR_MMM] = &DDStart[DIR_PPP *size_Mat];
+				D.f[DIR_PPM] = &DDStart[DIR_MMP *size_Mat];
+				D.f[DIR_MPM]= &DDStart[DIR_PMP *size_Mat];
+				D.f[DIR_PMM]= &DDStart[DIR_MPP *size_Mat];
+				D.f[DIR_MMP] = &DDStart[DIR_PPM *size_Mat];
+				D.f[DIR_PPP] = &DDStart[DIR_MMM *size_Mat];
+				D.f[DIR_MPP] = &DDStart[DIR_PMM *size_Mat];
+				D.f[DIR_PMP] = &DDStart[DIR_MPM *size_Mat];
 			}
 
 			////////////////////////////////////////////////////////////////////////////////
@@ -147,33 +147,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
 			unsigned int kbsw = neighborZ[ksw];
 
 			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			real mfcbb = (D.f[E])[k];
-			real mfabb = (D.f[W])[kw];
-			real mfbcb = (D.f[N])[k];
-			real mfbab = (D.f[S])[ks];
-			real mfbbc = (D.f[T])[k];
-			real mfbba = (D.f[B])[kb];
-			real mfccb = (D.f[NE])[k];
-			real mfaab = (D.f[SW])[ksw];
-			real mfcab = (D.f[SE])[ks];
-			real mfacb = (D.f[NW])[kw];
-			real mfcbc = (D.f[TE])[k];
-			real mfaba = (D.f[BW])[kbw];
-			real mfcba = (D.f[BE])[kb];
-			real mfabc = (D.f[TW])[kw];
-			real mfbcc = (D.f[TN])[k];
-			real mfbaa = (D.f[BS])[kbs];
-			real mfbca = (D.f[BN])[kb];
-			real mfbac = (D.f[TS])[ks];
-			real mfbbb = (D.f[REST])[k];
-			real mfccc = (D.f[TNE])[k];
-			real mfaac = (D.f[TSW])[ksw];
-			real mfcac = (D.f[TSE])[ks];
-			real mfacc = (D.f[TNW])[kw];
-			real mfcca = (D.f[BNE])[kb];
-			real mfaaa = (D.f[BSW])[kbsw];
-			real mfcaa = (D.f[BSE])[kbs];
-			real mfaca = (D.f[BNW])[kbw];
+			real mfcbb = (D.f[DIR_P00])[k];
+			real mfabb = (D.f[DIR_M00])[kw];
+			real mfbcb = (D.f[DIR_0P0])[k];
+			real mfbab = (D.f[DIR_0M0])[ks];
+			real mfbbc = (D.f[DIR_00P])[k];
+			real mfbba = (D.f[DIR_00M])[kb];
+			real mfccb = (D.f[DIR_PP0])[k];
+			real mfaab = (D.f[DIR_MM0])[ksw];
+			real mfcab = (D.f[DIR_PM0])[ks];
+			real mfacb = (D.f[DIR_MP0])[kw];
+			real mfcbc = (D.f[DIR_P0P])[k];
+			real mfaba = (D.f[DIR_M0M])[kbw];
+			real mfcba = (D.f[DIR_P0M])[kb];
+			real mfabc = (D.f[DIR_M0P])[kw];
+			real mfbcc = (D.f[DIR_0PP])[k];
+			real mfbaa = (D.f[DIR_0MM])[kbs];
+			real mfbca = (D.f[DIR_0PM])[kb];
+			real mfbac = (D.f[DIR_0MP])[ks];
+			real mfbbb = (D.f[DIR_000])[k];
+			real mfccc = (D.f[DIR_PPP])[k];
+			real mfaac = (D.f[DIR_MMP])[ksw];
+			real mfcac = (D.f[DIR_PMP])[ks];
+			real mfacc = (D.f[DIR_MPP])[kw];
+			real mfcca = (D.f[DIR_PPM])[kb];
+			real mfaaa = (D.f[DIR_MMM])[kbsw];
+			real mfcaa = (D.f[DIR_PMM])[kbs];
+			real mfaca = (D.f[DIR_MPM])[kbw];
 			////////////////////////////////////////////////////////////////////////////////////
 			real drho = ((((mfccc + mfaaa) + (mfaca + mfcac)) + ((mfacc + mfcaa) + (mfaac + mfcca))) +
 				(((mfbac + mfbca) + (mfbaa + mfbcc)) + ((mfabc + mfcba) + (mfaba + mfcbc)) + ((mfacb + mfcab) + (mfaab + mfccb))) +
@@ -1158,33 +1158,33 @@ extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
 			////////////////////////////////////////////////////////////////////////////////////
 
 			////////////////////////////////////////////////////////////////////////////////////
-			(D.f[E])[k] = mfabb;
-			(D.f[W])[kw] = mfcbb;
-			(D.f[N])[k] = mfbab;
-			(D.f[S])[ks] = mfbcb;
-			(D.f[T])[k] = mfbba;
-			(D.f[B])[kb] = mfbbc;
-			(D.f[NE])[k] = mfaab;
-			(D.f[SW])[ksw] = mfccb;
-			(D.f[SE])[ks] = mfacb;
-			(D.f[NW])[kw] = mfcab;
-			(D.f[TE])[k] = mfaba;
-			(D.f[BW])[kbw] = mfcbc;
-			(D.f[BE])[kb] = mfabc;
-			(D.f[TW])[kw] = mfcba;
-			(D.f[TN])[k] = mfbaa;
-			(D.f[BS])[kbs] = mfbcc;
-			(D.f[BN])[kb] = mfbac;
-			(D.f[TS])[ks] = mfbca;
-			(D.f[REST])[k] = mfbbb;
-			(D.f[TNE])[k] = mfaaa;
-			(D.f[TSE])[ks] = mfaca;
-			(D.f[BNE])[kb] = mfaac;
-			(D.f[BSE])[kbs] = mfacc;
-			(D.f[TNW])[kw] = mfcaa;
-			(D.f[TSW])[ksw] = mfcca;
-			(D.f[BNW])[kbw] = mfcac;
-			(D.f[BSW])[kbsw] = mfccc;
+			(D.f[DIR_P00])[k] = mfabb;
+			(D.f[DIR_M00])[kw] = mfcbb;
+			(D.f[DIR_0P0])[k] = mfbab;
+			(D.f[DIR_0M0])[ks] = mfbcb;
+			(D.f[DIR_00P])[k] = mfbba;
+			(D.f[DIR_00M])[kb] = mfbbc;
+			(D.f[DIR_PP0])[k] = mfaab;
+			(D.f[DIR_MM0])[ksw] = mfccb;
+			(D.f[DIR_PM0])[ks] = mfacb;
+			(D.f[DIR_MP0])[kw] = mfcab;
+			(D.f[DIR_P0P])[k] = mfaba;
+			(D.f[DIR_M0M])[kbw] = mfcbc;
+			(D.f[DIR_P0M])[kb] = mfabc;
+			(D.f[DIR_M0P])[kw] = mfcba;
+			(D.f[DIR_0PP])[k] = mfbaa;
+			(D.f[DIR_0MM])[kbs] = mfbcc;
+			(D.f[DIR_0PM])[kb] = mfbac;
+			(D.f[DIR_0MP])[ks] = mfbca;
+			(D.f[DIR_000])[k] = mfbbb;
+			(D.f[DIR_PPP])[k] = mfaaa;
+			(D.f[DIR_PMP])[ks] = mfaca;
+			(D.f[DIR_PPM])[kb] = mfaac;
+			(D.f[DIR_PMM])[kbs] = mfacc;
+			(D.f[DIR_MPP])[kw] = mfcaa;
+			(D.f[DIR_MMP])[ksw] = mfcca;
+			(D.f[DIR_MPM])[kbw] = mfcac;
+			(D.f[DIR_MMM])[kbsw] = mfccc;
 			////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
index 6f4253639f5d7bfce82f5909914c793955ef794a..50da48fa702862b71dc13b8b21b34cd1e2b39250 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Kernels/WaleKernels/FluidFlow/Compressible/CumulantK17Debug/WaleCumulantK17DebugComp_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Kernel_WaleCumulantK17DebugComp(
+__global__ void LB_Kernel_WaleCumulantK17DebugComp(
 	real omega_in,
 	unsigned int* bcMatD,
 	unsigned int* neighborX,
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
index 9528f0ef256b1e868d389563e2aafa32bbf096ab..7c477c539dc3526389dc22563b50501e778a63f3 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cu
@@ -4,77 +4,12 @@
 
 
 #include <lbm/constants/NumericConstants.h>
-#include <lbm/constants/D3Q27.h>
 #include "lbm/constants/D3Q27.h"
 using namespace vf::lbm::dir;
 
 namespace vf::gpu
 {
 
-__device__ __host__ DistributionReferences27 getDistributionReferences27(real *distributions, unsigned int size_Mat, bool isEvenTimestep)
-{
-    DistributionReferences27 distribution_references;
-
-    if (isEvenTimestep) {
-        distribution_references.f[E]    = &distributions[E * size_Mat];
-        distribution_references.f[W]    = &distributions[W * size_Mat];
-        distribution_references.f[N]    = &distributions[N * size_Mat];
-        distribution_references.f[S]    = &distributions[S * size_Mat];
-        distribution_references.f[T]    = &distributions[T * size_Mat];
-        distribution_references.f[B]    = &distributions[B * size_Mat];
-        distribution_references.f[NE]   = &distributions[NE * size_Mat];
-        distribution_references.f[SW]   = &distributions[SW * size_Mat];
-        distribution_references.f[SE]   = &distributions[SE * size_Mat];
-        distribution_references.f[NW]   = &distributions[NW * size_Mat];
-        distribution_references.f[TE]   = &distributions[TE * size_Mat];
-        distribution_references.f[BW]   = &distributions[BW * size_Mat];
-        distribution_references.f[BE]   = &distributions[BE * size_Mat];
-        distribution_references.f[TW]   = &distributions[TW * size_Mat];
-        distribution_references.f[TN]   = &distributions[TN * size_Mat];
-        distribution_references.f[BS]   = &distributions[BS * size_Mat];
-        distribution_references.f[BN]   = &distributions[BN * size_Mat];
-        distribution_references.f[TS]   = &distributions[TS * size_Mat];
-        distribution_references.f[REST] = &distributions[REST * size_Mat];
-        distribution_references.f[TNE]  = &distributions[TNE * size_Mat];
-        distribution_references.f[TSW]  = &distributions[TSW * size_Mat];
-        distribution_references.f[TSE]  = &distributions[TSE * size_Mat];
-        distribution_references.f[TNW]  = &distributions[TNW * size_Mat];
-        distribution_references.f[BNE]  = &distributions[BNE * size_Mat];
-        distribution_references.f[BSW]  = &distributions[BSW * size_Mat];
-        distribution_references.f[BSE]  = &distributions[BSE * size_Mat];
-        distribution_references.f[BNW]  = &distributions[BNW * size_Mat];
-    } else {
-        distribution_references.f[W]    = &distributions[E * size_Mat];
-        distribution_references.f[E]    = &distributions[W * size_Mat];
-        distribution_references.f[S]    = &distributions[N * size_Mat];
-        distribution_references.f[N]    = &distributions[S * size_Mat];
-        distribution_references.f[B]    = &distributions[T * size_Mat];
-        distribution_references.f[T]    = &distributions[B * size_Mat];
-        distribution_references.f[SW]   = &distributions[NE * size_Mat];
-        distribution_references.f[NE]   = &distributions[SW * size_Mat];
-        distribution_references.f[NW]   = &distributions[SE * size_Mat];
-        distribution_references.f[SE]   = &distributions[NW * size_Mat];
-        distribution_references.f[BW]   = &distributions[TE * size_Mat];
-        distribution_references.f[TE]   = &distributions[BW * size_Mat];
-        distribution_references.f[TW]   = &distributions[BE * size_Mat];
-        distribution_references.f[BE]   = &distributions[TW * size_Mat];
-        distribution_references.f[BS]   = &distributions[TN * size_Mat];
-        distribution_references.f[TN]   = &distributions[BS * size_Mat];
-        distribution_references.f[TS]   = &distributions[BN * size_Mat];
-        distribution_references.f[BN]   = &distributions[TS * size_Mat];
-        distribution_references.f[REST] = &distributions[REST * size_Mat];
-        distribution_references.f[BSW]  = &distributions[TNE * size_Mat];
-        distribution_references.f[BNE]  = &distributions[TSW * size_Mat];
-        distribution_references.f[BNW]  = &distributions[TSE * size_Mat];
-        distribution_references.f[BSE]  = &distributions[TNW * size_Mat];
-        distribution_references.f[TSW]  = &distributions[BNE * size_Mat];
-        distribution_references.f[TNE]  = &distributions[BSW * size_Mat];
-        distribution_references.f[TNW]  = &distributions[BSE * size_Mat];
-        distribution_references.f[TSE]  = &distributions[BNW * size_Mat];
-    }
-    return distribution_references;
-}
-
 __device__ DistributionWrapper::DistributionWrapper(real *distributions, unsigned int size_Mat, bool isEvenTimestep,
                                                     uint k, uint *neighborX, uint *neighborY, uint *neighborZ)
     : distribution_references(getDistributionReferences27(distributions, size_Mat, isEvenTimestep)), k(k), kw(neighborX[k]), ks(neighborY[k]),
@@ -85,76 +20,64 @@ __device__ DistributionWrapper::DistributionWrapper(real *distributions, unsigne
 
 __device__ void DistributionWrapper::read()
 {
-    distribution.f[vf::lbm::dir::PZZ] = (distribution_references.f[E])[k];
-    distribution.f[vf::lbm::dir::MZZ] = (distribution_references.f[W])[kw];
-    distribution.f[vf::lbm::dir::ZPZ] = (distribution_references.f[N])[k];
-    distribution.f[vf::lbm::dir::ZMZ] = (distribution_references.f[S])[ks];
-    distribution.f[vf::lbm::dir::ZZP] = (distribution_references.f[T])[k];
-    distribution.f[vf::lbm::dir::ZZM] = (distribution_references.f[B])[kb];
-    distribution.f[vf::lbm::dir::PPZ] = (distribution_references.f[NE])[k];
-    distribution.f[vf::lbm::dir::MMZ] = (distribution_references.f[SW])[ksw];
-    distribution.f[vf::lbm::dir::PMZ] = (distribution_references.f[SE])[ks];
-    distribution.f[vf::lbm::dir::MPZ] = (distribution_references.f[NW])[kw];
-    distribution.f[vf::lbm::dir::PZP] = (distribution_references.f[TE])[k];
-    distribution.f[vf::lbm::dir::MZM] = (distribution_references.f[BW])[kbw];
-    distribution.f[vf::lbm::dir::PZM] = (distribution_references.f[BE])[kb];
-    distribution.f[vf::lbm::dir::MZP] = (distribution_references.f[TW])[kw];
-    distribution.f[vf::lbm::dir::ZPP] = (distribution_references.f[TN])[k];
-    distribution.f[vf::lbm::dir::ZMM] = (distribution_references.f[BS])[kbs];
-    distribution.f[vf::lbm::dir::ZPM] = (distribution_references.f[BN])[kb];
-    distribution.f[vf::lbm::dir::ZMP] = (distribution_references.f[TS])[ks];
-    distribution.f[vf::lbm::dir::PPP] = (distribution_references.f[TNE])[k];
-    distribution.f[vf::lbm::dir::MPP] = (distribution_references.f[TNW])[kw];
-    distribution.f[vf::lbm::dir::PMP] = (distribution_references.f[TSE])[ks];
-    distribution.f[vf::lbm::dir::MMP] = (distribution_references.f[TSW])[ksw];
-    distribution.f[vf::lbm::dir::PPM] = (distribution_references.f[BNE])[kb];
-    distribution.f[vf::lbm::dir::MPM] = (distribution_references.f[BNW])[kbw];
-    distribution.f[vf::lbm::dir::PMM] = (distribution_references.f[BSE])[kbs];
-    distribution.f[vf::lbm::dir::MMM] = (distribution_references.f[BSW])[kbsw];
-    distribution.f[vf::lbm::dir::ZZZ] = (distribution_references.f[REST])[k];
+    distribution.f[vf::lbm::dir::PZZ] = (distribution_references.f[DIR_P00])[k];
+    distribution.f[vf::lbm::dir::MZZ] = (distribution_references.f[DIR_M00])[kw];
+    distribution.f[vf::lbm::dir::ZPZ] = (distribution_references.f[DIR_0P0])[k];
+    distribution.f[vf::lbm::dir::ZMZ] = (distribution_references.f[DIR_0M0])[ks];
+    distribution.f[vf::lbm::dir::ZZP] = (distribution_references.f[DIR_00P])[k];
+    distribution.f[vf::lbm::dir::ZZM] = (distribution_references.f[DIR_00M])[kb];
+    distribution.f[vf::lbm::dir::PPZ] = (distribution_references.f[DIR_PP0])[k];
+    distribution.f[vf::lbm::dir::MMZ] = (distribution_references.f[DIR_MM0])[ksw];
+    distribution.f[vf::lbm::dir::PMZ] = (distribution_references.f[DIR_PM0])[ks];
+    distribution.f[vf::lbm::dir::MPZ] = (distribution_references.f[DIR_MP0])[kw];
+    distribution.f[vf::lbm::dir::PZP] = (distribution_references.f[DIR_P0P])[k];
+    distribution.f[vf::lbm::dir::MZM] = (distribution_references.f[DIR_M0M])[kbw];
+    distribution.f[vf::lbm::dir::PZM] = (distribution_references.f[DIR_P0M])[kb];
+    distribution.f[vf::lbm::dir::MZP] = (distribution_references.f[DIR_M0P])[kw];
+    distribution.f[vf::lbm::dir::ZPP] = (distribution_references.f[DIR_0PP])[k];
+    distribution.f[vf::lbm::dir::ZMM] = (distribution_references.f[DIR_0MM])[kbs];
+    distribution.f[vf::lbm::dir::ZPM] = (distribution_references.f[DIR_0PM])[kb];
+    distribution.f[vf::lbm::dir::ZMP] = (distribution_references.f[DIR_0MP])[ks];
+    distribution.f[vf::lbm::dir::PPP] = (distribution_references.f[DIR_PPP])[k];
+    distribution.f[vf::lbm::dir::MPP] = (distribution_references.f[DIR_MPP])[kw];
+    distribution.f[vf::lbm::dir::PMP] = (distribution_references.f[DIR_PMP])[ks];
+    distribution.f[vf::lbm::dir::MMP] = (distribution_references.f[DIR_MMP])[ksw];
+    distribution.f[vf::lbm::dir::PPM] = (distribution_references.f[DIR_PPM])[kb];
+    distribution.f[vf::lbm::dir::MPM] = (distribution_references.f[DIR_MPM])[kbw];
+    distribution.f[vf::lbm::dir::PMM] = (distribution_references.f[DIR_PMM])[kbs];
+    distribution.f[vf::lbm::dir::MMM] = (distribution_references.f[DIR_MMM])[kbsw];
+    distribution.f[vf::lbm::dir::ZZZ] = (distribution_references.f[DIR_000])[k];
 }
 
 __device__ void DistributionWrapper::write()
 {
-    (distribution_references.f[E])[k]      = distribution.f[vf::lbm::dir::PZZ];
-    (distribution_references.f[W])[kw]     = distribution.f[vf::lbm::dir::MZZ];
-    (distribution_references.f[N])[k]      = distribution.f[vf::lbm::dir::ZPZ];
-    (distribution_references.f[S])[ks]     = distribution.f[vf::lbm::dir::ZMZ];
-    (distribution_references.f[T])[k]      = distribution.f[vf::lbm::dir::ZZP];
-    (distribution_references.f[B])[kb]     = distribution.f[vf::lbm::dir::ZZM];
-    (distribution_references.f[NE])[k]     = distribution.f[vf::lbm::dir::PPZ];
-    (distribution_references.f[SW])[ksw]   = distribution.f[vf::lbm::dir::MMZ];
-    (distribution_references.f[SE])[ks]    = distribution.f[vf::lbm::dir::PMZ];
-    (distribution_references.f[NW])[kw]    = distribution.f[vf::lbm::dir::MPZ];
-    (distribution_references.f[TE])[k]     = distribution.f[vf::lbm::dir::PZP];
-    (distribution_references.f[BW])[kbw]   = distribution.f[vf::lbm::dir::MZM];
-    (distribution_references.f[BE])[kb]    = distribution.f[vf::lbm::dir::PZM];
-    (distribution_references.f[TW])[kw]    = distribution.f[vf::lbm::dir::MZP];
-    (distribution_references.f[TN])[k]     = distribution.f[vf::lbm::dir::ZPP];
-    (distribution_references.f[BS])[kbs]   = distribution.f[vf::lbm::dir::ZMM];
-    (distribution_references.f[BN])[kb]    = distribution.f[vf::lbm::dir::ZPM];
-    (distribution_references.f[TS])[ks]    = distribution.f[vf::lbm::dir::ZMP];
-    (distribution_references.f[TNE])[k]    = distribution.f[vf::lbm::dir::PPP];
-    (distribution_references.f[TNW])[kw]   = distribution.f[vf::lbm::dir::MPP];
-    (distribution_references.f[TSE])[ks]   = distribution.f[vf::lbm::dir::PMP];
-    (distribution_references.f[TSW])[ksw]  = distribution.f[vf::lbm::dir::MMP];
-    (distribution_references.f[BNE])[kb]   = distribution.f[vf::lbm::dir::PPM];
-    (distribution_references.f[BNW])[kbw]  = distribution.f[vf::lbm::dir::MPM];
-    (distribution_references.f[BSE])[kbs]  = distribution.f[vf::lbm::dir::PMM];
-    (distribution_references.f[BSW])[kbsw] = distribution.f[vf::lbm::dir::MMM];
-    (distribution_references.f[REST])[k]   = distribution.f[vf::lbm::dir::ZZZ];
-}
-
-__device__ unsigned int getNodeIndex()
-{
-    const unsigned x = threadIdx.x;
-    const unsigned y = blockIdx.x;
-    const unsigned z = blockIdx.y;
-
-    const unsigned nx = blockDim.x;
-    const unsigned ny = gridDim.x;
-
-    return nx * (ny * z + y) + x;
+    (distribution_references.f[DIR_P00])[k]      = distribution.f[vf::lbm::dir::PZZ];
+    (distribution_references.f[DIR_M00])[kw]     = distribution.f[vf::lbm::dir::MZZ];
+    (distribution_references.f[DIR_0P0])[k]      = distribution.f[vf::lbm::dir::ZPZ];
+    (distribution_references.f[DIR_0M0])[ks]     = distribution.f[vf::lbm::dir::ZMZ];
+    (distribution_references.f[DIR_00P])[k]      = distribution.f[vf::lbm::dir::ZZP];
+    (distribution_references.f[DIR_00M])[kb]     = distribution.f[vf::lbm::dir::ZZM];
+    (distribution_references.f[DIR_PP0])[k]     = distribution.f[vf::lbm::dir::PPZ];
+    (distribution_references.f[DIR_MM0])[ksw]   = distribution.f[vf::lbm::dir::MMZ];
+    (distribution_references.f[DIR_PM0])[ks]    = distribution.f[vf::lbm::dir::PMZ];
+    (distribution_references.f[DIR_MP0])[kw]    = distribution.f[vf::lbm::dir::MPZ];
+    (distribution_references.f[DIR_P0P])[k]     = distribution.f[vf::lbm::dir::PZP];
+    (distribution_references.f[DIR_M0M])[kbw]   = distribution.f[vf::lbm::dir::MZM];
+    (distribution_references.f[DIR_P0M])[kb]    = distribution.f[vf::lbm::dir::PZM];
+    (distribution_references.f[DIR_M0P])[kw]    = distribution.f[vf::lbm::dir::MZP];
+    (distribution_references.f[DIR_0PP])[k]     = distribution.f[vf::lbm::dir::ZPP];
+    (distribution_references.f[DIR_0MM])[kbs]   = distribution.f[vf::lbm::dir::ZMM];
+    (distribution_references.f[DIR_0PM])[kb]    = distribution.f[vf::lbm::dir::ZPM];
+    (distribution_references.f[DIR_0MP])[ks]    = distribution.f[vf::lbm::dir::ZMP];
+    (distribution_references.f[DIR_PPP])[k]    = distribution.f[vf::lbm::dir::PPP];
+    (distribution_references.f[DIR_MPP])[kw]   = distribution.f[vf::lbm::dir::MPP];
+    (distribution_references.f[DIR_PMP])[ks]   = distribution.f[vf::lbm::dir::PMP];
+    (distribution_references.f[DIR_MMP])[ksw]  = distribution.f[vf::lbm::dir::MMP];
+    (distribution_references.f[DIR_PPM])[kb]   = distribution.f[vf::lbm::dir::PPM];
+    (distribution_references.f[DIR_MPM])[kbw]  = distribution.f[vf::lbm::dir::MPM];
+    (distribution_references.f[DIR_PMM])[kbs]  = distribution.f[vf::lbm::dir::PMM];
+    (distribution_references.f[DIR_MMM])[kbsw] = distribution.f[vf::lbm::dir::MMM];
+    (distribution_references.f[DIR_000])[k]   = distribution.f[vf::lbm::dir::ZZZ];
 }
 
 __device__ bool isValidFluidNode(uint nodeType)
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
index 6b38cac75c99680c71420533455dd060195b6c87..93cec8b9e18ba91f2613ba43971b4312dec2ccb6 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelper.cuh
@@ -35,12 +35,77 @@
 
 #include "LBM/LB.h" 
 
-#include <lbm/KernelParameter.h>
+#include "lbm/KernelParameter.h"
+#include "lbm/constants/D3Q27.h"
 
-namespace vf
+using namespace vf::lbm::dir;
+
+namespace vf::gpu
 {
-namespace gpu
+
+__inline__ __device__ __host__ void getPointersToDistributions(Distributions27 &dist, real *distributionArray, const uint numberOfLBnodes, const bool isEvenTimestep)
 {
+    if (isEvenTimestep)
+    {
+        dist.f[DIR_P00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
+        dist.f[DIR_M00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
+        dist.f[DIR_0P0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
+        dist.f[DIR_0M0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
+        dist.f[DIR_00P   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
+        dist.f[DIR_00M   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
+        dist.f[DIR_PP0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
+        dist.f[DIR_MM0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
+        dist.f[DIR_PM0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
+        dist.f[DIR_MP0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
+        dist.f[DIR_P0P  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
+        dist.f[DIR_M0M  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
+        dist.f[DIR_P0M  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
+        dist.f[DIR_M0P  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
+        dist.f[DIR_0PP  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
+        dist.f[DIR_0MM  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
+        dist.f[DIR_0PM  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
+        dist.f[DIR_0MP  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
+        dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
+        dist.f[DIR_PPP ] = &distributionArray[DIR_PPP *numberOfLBnodes];
+        dist.f[DIR_MMP ] = &distributionArray[DIR_MMP *numberOfLBnodes];
+        dist.f[DIR_PMP ] = &distributionArray[DIR_PMP *numberOfLBnodes];
+        dist.f[DIR_MPP ] = &distributionArray[DIR_MPP *numberOfLBnodes];
+        dist.f[DIR_PPM ] = &distributionArray[DIR_PPM *numberOfLBnodes];
+        dist.f[DIR_MMM ] = &distributionArray[DIR_MMM *numberOfLBnodes];
+        dist.f[DIR_PMM ] = &distributionArray[DIR_PMM *numberOfLBnodes];
+        dist.f[DIR_MPM ] = &distributionArray[DIR_MPM *numberOfLBnodes];
+    }
+    else
+    {
+         dist.f[DIR_M00   ] = &distributionArray[DIR_P00   *numberOfLBnodes];
+         dist.f[DIR_P00   ] = &distributionArray[DIR_M00   *numberOfLBnodes];
+         dist.f[DIR_0M0   ] = &distributionArray[DIR_0P0   *numberOfLBnodes];
+         dist.f[DIR_0P0   ] = &distributionArray[DIR_0M0   *numberOfLBnodes];
+         dist.f[DIR_00M   ] = &distributionArray[DIR_00P   *numberOfLBnodes];
+         dist.f[DIR_00P   ] = &distributionArray[DIR_00M   *numberOfLBnodes];
+         dist.f[DIR_MM0  ] = &distributionArray[DIR_PP0  *numberOfLBnodes];
+         dist.f[DIR_PP0  ] = &distributionArray[DIR_MM0  *numberOfLBnodes];
+         dist.f[DIR_MP0  ] = &distributionArray[DIR_PM0  *numberOfLBnodes];
+         dist.f[DIR_PM0  ] = &distributionArray[DIR_MP0  *numberOfLBnodes];
+         dist.f[DIR_M0M  ] = &distributionArray[DIR_P0P  *numberOfLBnodes];
+         dist.f[DIR_P0P  ] = &distributionArray[DIR_M0M  *numberOfLBnodes];
+         dist.f[DIR_M0P  ] = &distributionArray[DIR_P0M  *numberOfLBnodes];
+         dist.f[DIR_P0M  ] = &distributionArray[DIR_M0P  *numberOfLBnodes];
+         dist.f[DIR_0MM  ] = &distributionArray[DIR_0PP  *numberOfLBnodes];
+         dist.f[DIR_0PP  ] = &distributionArray[DIR_0MM  *numberOfLBnodes];
+         dist.f[DIR_0MP  ] = &distributionArray[DIR_0PM  *numberOfLBnodes];
+         dist.f[DIR_0PM  ] = &distributionArray[DIR_0MP  *numberOfLBnodes];
+         dist.f[DIR_000] = &distributionArray[DIR_000*numberOfLBnodes];
+         dist.f[DIR_PPP ] = &distributionArray[DIR_MMM *numberOfLBnodes];
+         dist.f[DIR_MMP ] = &distributionArray[DIR_PPM *numberOfLBnodes];
+         dist.f[DIR_PMP ] = &distributionArray[DIR_MPM *numberOfLBnodes];
+         dist.f[DIR_MPP ] = &distributionArray[DIR_PMM *numberOfLBnodes];
+         dist.f[DIR_PPM ] = &distributionArray[DIR_MMP *numberOfLBnodes];
+         dist.f[DIR_MMM ] = &distributionArray[DIR_PPP *numberOfLBnodes];
+         dist.f[DIR_PMM ] = &distributionArray[DIR_MPP *numberOfLBnodes];
+         dist.f[DIR_MPM ] = &distributionArray[DIR_PMP *numberOfLBnodes];
+    }
+}
 
 /**
 *  Getting references to the 27 directions.
@@ -49,7 +114,11 @@ namespace gpu
 *  @params isEvenTimestep: stored data dependent on timestep is based on the esoteric twist algorithm
 *  @return a data struct containing the addresses to the 27 directions within the 1D distribution array
 */
-__device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int matrix_size, bool isEvenTimestep);
+__inline__ __device__ __host__ DistributionReferences27 getDistributionReferences27(real* distributions, unsigned int numberOfLBnodes, bool isEvenTimestep){
+    DistributionReferences27 distribution_references;
+    getPointersToDistributions(distribution_references, distributions, numberOfLBnodes, isEvenTimestep);
+    return distribution_references;
+}
 
 
 /**
@@ -88,11 +157,20 @@ struct DistributionWrapper
     const uint kbsw;
 };
 
-__device__ unsigned int getNodeIndex();
+__inline__ __device__ unsigned int getNodeIndex()
+{
+    const unsigned x = threadIdx.x;
+    const unsigned y = blockIdx.x;
+    const unsigned z = blockIdx.y;
 
-__device__ bool isValidFluidNode(uint nodeType);
+    const unsigned nx = blockDim.x;
+    const unsigned ny = gridDim.x;
 
+    return nx * (ny * z + y) + x;
 }
+
+__device__ bool isValidFluidNode(uint nodeType);
+
 }
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp
index 88a5ae8d113e3a998932d53c1f29358ed2598d31..580641fa8016330e9545ed07c85366a12fd2f213 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/DistributionHelperTests.cpp
@@ -24,33 +24,33 @@ TEST(DistributionHelperTests, getPointerToDistribution_WhenEvenTimeStep_ShouldBe
 
     Distributions27 distribution_out = vf::gpu::getDistributionReferences27(distributions_in, size_Mat, isEvenTimeStep);
 
-    EXPECT_THAT(*distribution_out.f[E], RealEq(distributions_in[E]));
-    EXPECT_THAT(*distribution_out.f[W], RealEq(distributions_in[W]));
-    EXPECT_THAT(*distribution_out.f[N], RealEq(distributions_in[N]));
-    EXPECT_THAT(*distribution_out.f[S], RealEq(distributions_in[S]));
-    EXPECT_THAT(*distribution_out.f[T], RealEq(distributions_in[T]));
-    EXPECT_THAT(*distribution_out.f[B], RealEq(distributions_in[B]));
-    EXPECT_THAT(*distribution_out.f[NE], RealEq(distributions_in[NE]));
-    EXPECT_THAT(*distribution_out.f[SW], RealEq(distributions_in[SW]));
-    EXPECT_THAT(*distribution_out.f[SE], RealEq(distributions_in[SE]));
-    EXPECT_THAT(*distribution_out.f[NW], RealEq(distributions_in[NW]));
-    EXPECT_THAT(*distribution_out.f[TE], RealEq(distributions_in[TE]));
-    EXPECT_THAT(*distribution_out.f[BW], RealEq(distributions_in[BW]));
-    EXPECT_THAT(*distribution_out.f[BE], RealEq(distributions_in[BE]));
-    EXPECT_THAT(*distribution_out.f[TW], RealEq(distributions_in[TW]));
-    EXPECT_THAT(*distribution_out.f[TN], RealEq(distributions_in[TN]));
-    EXPECT_THAT(*distribution_out.f[BS], RealEq(distributions_in[BS]));
-    EXPECT_THAT(*distribution_out.f[BN], RealEq(distributions_in[BN]));
-    EXPECT_THAT(*distribution_out.f[TS], RealEq(distributions_in[TS]));
-    EXPECT_THAT(*distribution_out.f[REST], RealEq(distributions_in[REST]));
-    EXPECT_THAT(*distribution_out.f[TNE], RealEq(distributions_in[TNE]));
-    EXPECT_THAT(*distribution_out.f[TSW], RealEq(distributions_in[TSW]));
-    EXPECT_THAT(*distribution_out.f[TSE], RealEq(distributions_in[TSE]));
-    EXPECT_THAT(*distribution_out.f[TNW], RealEq(distributions_in[TNW]));
-    EXPECT_THAT(*distribution_out.f[BNE], RealEq(distributions_in[BNE]));
-    EXPECT_THAT(*distribution_out.f[BSW], RealEq(distributions_in[BSW]));
-    EXPECT_THAT(*distribution_out.f[BSE], RealEq(distributions_in[BSE]));
-    EXPECT_THAT(*distribution_out.f[BNW], RealEq(distributions_in[BNW]));
+    EXPECT_THAT(*distribution_out.f[DIR_P00], RealEq(distributions_in[DIR_P00]));
+    EXPECT_THAT(*distribution_out.f[DIR_M00], RealEq(distributions_in[DIR_M00]));
+    EXPECT_THAT(*distribution_out.f[DIR_0P0], RealEq(distributions_in[DIR_0P0]));
+    EXPECT_THAT(*distribution_out.f[DIR_0M0], RealEq(distributions_in[DIR_0M0]));
+    EXPECT_THAT(*distribution_out.f[DIR_00P], RealEq(distributions_in[DIR_00P]));
+    EXPECT_THAT(*distribution_out.f[DIR_00M], RealEq(distributions_in[DIR_00M]));
+    EXPECT_THAT(*distribution_out.f[DIR_PP0], RealEq(distributions_in[DIR_PP0]));
+    EXPECT_THAT(*distribution_out.f[DIR_MM0], RealEq(distributions_in[DIR_MM0]));
+    EXPECT_THAT(*distribution_out.f[DIR_PM0], RealEq(distributions_in[DIR_PM0]));
+    EXPECT_THAT(*distribution_out.f[DIR_MP0], RealEq(distributions_in[DIR_MP0]));
+    EXPECT_THAT(*distribution_out.f[DIR_P0P], RealEq(distributions_in[DIR_P0P]));
+    EXPECT_THAT(*distribution_out.f[DIR_M0M], RealEq(distributions_in[DIR_M0M]));
+    EXPECT_THAT(*distribution_out.f[DIR_P0M], RealEq(distributions_in[DIR_P0M]));
+    EXPECT_THAT(*distribution_out.f[DIR_M0P], RealEq(distributions_in[DIR_M0P]));
+    EXPECT_THAT(*distribution_out.f[DIR_0PP], RealEq(distributions_in[DIR_0PP]));
+    EXPECT_THAT(*distribution_out.f[DIR_0MM], RealEq(distributions_in[DIR_0MM]));
+    EXPECT_THAT(*distribution_out.f[DIR_0PM], RealEq(distributions_in[DIR_0PM]));
+    EXPECT_THAT(*distribution_out.f[DIR_0MP], RealEq(distributions_in[DIR_0MP]));
+    EXPECT_THAT(*distribution_out.f[DIR_000], RealEq(distributions_in[DIR_000]));
+    EXPECT_THAT(*distribution_out.f[DIR_PPP], RealEq(distributions_in[DIR_PPP]));
+    EXPECT_THAT(*distribution_out.f[DIR_MMP], RealEq(distributions_in[DIR_MMP]));
+    EXPECT_THAT(*distribution_out.f[DIR_PMP], RealEq(distributions_in[DIR_PMP]));
+    EXPECT_THAT(*distribution_out.f[DIR_MPP], RealEq(distributions_in[DIR_MPP]));
+    EXPECT_THAT(*distribution_out.f[DIR_PPM], RealEq(distributions_in[DIR_PPM]));
+    EXPECT_THAT(*distribution_out.f[DIR_MMM], RealEq(distributions_in[DIR_MMM]));
+    EXPECT_THAT(*distribution_out.f[DIR_PMM], RealEq(distributions_in[DIR_PMM]));
+    EXPECT_THAT(*distribution_out.f[DIR_MPM], RealEq(distributions_in[DIR_MPM]));
 }
 
 TEST(DistributionHelperTests, getPointerToDistribution_WhenOddTimeStep_ShouldBeSwapped)
@@ -63,31 +63,31 @@ TEST(DistributionHelperTests, getPointerToDistribution_WhenOddTimeStep_ShouldBeS
 
     Distributions27 distribution_out = vf::gpu::getDistributionReferences27(distributions_in, size_Mat, isEvenTimeStep);
 
-    EXPECT_THAT(*distribution_out.f[W], RealEq(distributions_in[E]));
-    EXPECT_THAT(*distribution_out.f[E], RealEq(distributions_in[W]));
-    EXPECT_THAT(*distribution_out.f[S], RealEq(distributions_in[N]));
-    EXPECT_THAT(*distribution_out.f[N], RealEq(distributions_in[S]));
-    EXPECT_THAT(*distribution_out.f[B], RealEq(distributions_in[T]));
-    EXPECT_THAT(*distribution_out.f[T], RealEq(distributions_in[B]));
-    EXPECT_THAT(*distribution_out.f[SW], RealEq(distributions_in[NE]));
-    EXPECT_THAT(*distribution_out.f[NE], RealEq(distributions_in[SW]));
-    EXPECT_THAT(*distribution_out.f[NW], RealEq(distributions_in[SE]));
-    EXPECT_THAT(*distribution_out.f[SE], RealEq(distributions_in[NW]));
-    EXPECT_THAT(*distribution_out.f[BW], RealEq(distributions_in[TE]));
-    EXPECT_THAT(*distribution_out.f[TE], RealEq(distributions_in[BW]));
-    EXPECT_THAT(*distribution_out.f[TW], RealEq(distributions_in[BE]));
-    EXPECT_THAT(*distribution_out.f[BE], RealEq(distributions_in[TW]));
-    EXPECT_THAT(*distribution_out.f[BS], RealEq(distributions_in[TN]));
-    EXPECT_THAT(*distribution_out.f[TN], RealEq(distributions_in[BS]));
-    EXPECT_THAT(*distribution_out.f[TS], RealEq(distributions_in[BN]));
-    EXPECT_THAT(*distribution_out.f[BN], RealEq(distributions_in[TS]));
-    EXPECT_THAT(*distribution_out.f[REST], RealEq(distributions_in[REST]));
-    EXPECT_THAT(*distribution_out.f[BSW], RealEq(distributions_in[TNE]));
-    EXPECT_THAT(*distribution_out.f[BNE], RealEq(distributions_in[TSW]));
-    EXPECT_THAT(*distribution_out.f[BNW], RealEq(distributions_in[TSE]));
-    EXPECT_THAT(*distribution_out.f[BSE], RealEq(distributions_in[TNW]));
-    EXPECT_THAT(*distribution_out.f[TSW], RealEq(distributions_in[BNE]));
-    EXPECT_THAT(*distribution_out.f[TNE], RealEq(distributions_in[BSW]));
-    EXPECT_THAT(*distribution_out.f[TNW], RealEq(distributions_in[BSE]));
-    EXPECT_THAT(*distribution_out.f[TSE], RealEq(distributions_in[BNW]));
+    EXPECT_THAT(*distribution_out.f[DIR_M00], RealEq(distributions_in[DIR_P00]));
+    EXPECT_THAT(*distribution_out.f[DIR_P00], RealEq(distributions_in[DIR_M00]));
+    EXPECT_THAT(*distribution_out.f[DIR_0M0], RealEq(distributions_in[DIR_0P0]));
+    EXPECT_THAT(*distribution_out.f[DIR_0P0], RealEq(distributions_in[DIR_0M0]));
+    EXPECT_THAT(*distribution_out.f[DIR_00M], RealEq(distributions_in[DIR_00P]));
+    EXPECT_THAT(*distribution_out.f[DIR_00P], RealEq(distributions_in[DIR_00M]));
+    EXPECT_THAT(*distribution_out.f[DIR_MM0], RealEq(distributions_in[DIR_PP0]));
+    EXPECT_THAT(*distribution_out.f[DIR_PP0], RealEq(distributions_in[DIR_MM0]));
+    EXPECT_THAT(*distribution_out.f[DIR_MP0], RealEq(distributions_in[DIR_PM0]));
+    EXPECT_THAT(*distribution_out.f[DIR_PM0], RealEq(distributions_in[DIR_MP0]));
+    EXPECT_THAT(*distribution_out.f[DIR_M0M], RealEq(distributions_in[DIR_P0P]));
+    EXPECT_THAT(*distribution_out.f[DIR_P0P], RealEq(distributions_in[DIR_M0M]));
+    EXPECT_THAT(*distribution_out.f[DIR_M0P], RealEq(distributions_in[DIR_P0M]));
+    EXPECT_THAT(*distribution_out.f[DIR_P0M], RealEq(distributions_in[DIR_M0P]));
+    EXPECT_THAT(*distribution_out.f[DIR_0MM], RealEq(distributions_in[DIR_0PP]));
+    EXPECT_THAT(*distribution_out.f[DIR_0PP], RealEq(distributions_in[DIR_0MM]));
+    EXPECT_THAT(*distribution_out.f[DIR_0MP], RealEq(distributions_in[DIR_0PM]));
+    EXPECT_THAT(*distribution_out.f[DIR_0PM], RealEq(distributions_in[DIR_0MP]));
+    EXPECT_THAT(*distribution_out.f[DIR_000], RealEq(distributions_in[DIR_000]));
+    EXPECT_THAT(*distribution_out.f[DIR_MMM], RealEq(distributions_in[DIR_PPP]));
+    EXPECT_THAT(*distribution_out.f[DIR_PPM], RealEq(distributions_in[DIR_MMP]));
+    EXPECT_THAT(*distribution_out.f[DIR_MPM], RealEq(distributions_in[DIR_PMP]));
+    EXPECT_THAT(*distribution_out.f[DIR_PMM], RealEq(distributions_in[DIR_MPP]));
+    EXPECT_THAT(*distribution_out.f[DIR_MMP], RealEq(distributions_in[DIR_PPM]));
+    EXPECT_THAT(*distribution_out.f[DIR_PPP], RealEq(distributions_in[DIR_MMM]));
+    EXPECT_THAT(*distribution_out.f[DIR_MPP], RealEq(distributions_in[DIR_PMM]));
+    EXPECT_THAT(*distribution_out.f[DIR_PMP], RealEq(distributions_in[DIR_MPM]));
 }
diff --git a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
index 41df8c6e79bd0a450108e1558b388608fb4e2b83..53ec240f096080097416e640fdd095c3812fb34c 100644
--- a/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
+++ b/src/gpu/VirtualFluids_GPU/Kernel/Utilities/KernelFactory/KernelFactoryImp.cpp
@@ -12,6 +12,7 @@
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Unified/CumulantK17Unified.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chim/CumulantK17CompChim.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimStream/CumulantK17CompChimStream.h"
+#include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17chimRedesigned/CumulantK17CompChimRedesigned.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK17Bulk/CumulantK17BulkComp.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantAll4/CumulantAll4CompSP27.h"
 #include "Kernel/Kernels/BasicKernels/FluidFlow/Compressible/CumulantK18/CumulantK18Comp.h"
@@ -135,6 +136,9 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
     } else if (kernel == "CumulantK17CompChimStream") {
         newKernel     = CumulantK17CompChimStream::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
+    } else if (kernel == "CumulantK17CompChimRedesigned") {
+        newKernel     = CumulantK17CompChimRedesigned::getNewInstance(para, level);
+        checkStrategy = FluidFlowCompStrategy::getInstance();
     } else if (kernel == "CumulantAll4CompSP27") {
         newKernel     = CumulantAll4CompSP27::getNewInstance(para, level);
         checkStrategy = FluidFlowCompStrategy::getInstance();
@@ -191,19 +195,40 @@ std::shared_ptr<Kernel> KernelFactoryImp::makeKernel(std::shared_ptr<Parameter>
         checkStrategy = WaleFluidFlowCompStrategy::getInstance();
     } else if (kernel == "WaleBySoniMalavCumulantK15Comp") {                    //     /\      //
         newKernel     = WaleBySoniMalavCumulantK15Comp::getNewInstance(para, level);// ||
-        checkStrategy = WaleFluidFlowCompStrategy::getInstance();                    // wale model
-    }                                                                           //===============
-    else if (kernel == "TurbulentViscosityCumulantK17CompChim"){                               // AMD model
-        newKernel     = TurbulentViscosityCumulantK17CompChim::getNewInstance(para, level);    //      ||
-        checkStrategy = TurbulentViscosityFluidFlowCompStrategy::getInstance();                //      \/
+        checkStrategy = WaleFluidFlowCompStrategy::getInstance();               // wale model
+    }                                                                          //===============
+    else if (kernel == "TurbulentViscosityCumulantK17CompChim"){               // compressible with turbulent viscosity
+        switch(para->getTurbulenceModel())                                     //       ||          
+        {                                                                      //       \/      //
+            case TurbulenceModel::AMD:
+                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::AMD>::getNewInstance(para, level);   
+                break;
+            case TurbulenceModel::Smagorinsky:
+                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::Smagorinsky>::getNewInstance(para, level);  
+                break;
+            case TurbulenceModel::QR:
+                newKernel = TurbulentViscosityCumulantK17CompChim<TurbulenceModel::QR>::getNewInstance(para, level);  
+                break;
+            case TurbulenceModel::None:
+                throw std::runtime_error("TurbulentViscosityCumulantK17CompChim currently not implemented for TurbulenceModel::None!");
+                break;
+            default:
+                throw std::runtime_error("Unknown turbulence model!");
+            break;                                                              
+        }                                                                       
+        checkStrategy = TurbulentViscosityFluidFlowCompStrategy::getInstance(); 
+                                                                                //     /\      //
+                                                                                //     ||    
+                                                                                // compressible with turbulent viscosity  
+                                                                                //===============         
     }
     else {
         throw std::runtime_error("KernelFactory does not know the KernelType.");
     }
 
-	newKernel->setCheckParameterStrategy(checkStrategy);
-	return newKernel;
-
+    newKernel->setCheckParameterStrategy(checkStrategy);
+    para->setKernelNeedsFluidNodeIndicesToRun(newKernel->getKernelUsesFluidNodeIndices());
+    return newKernel;
 }
 
 std::shared_ptr<ADKernel> KernelFactoryImp::makeAdvDifKernel(std::shared_ptr<Parameter> para, std::string kernel, int level)
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
index fee3538169fcc081c99bf858ae81bdf075dc4f13..574c235c86823a3cea551555235c03152d74565c 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.cpp
@@ -32,23 +32,39 @@
 //=======================================================================================
 #include <cuda_runtime.h>
 #include <helper_cuda.h>
+#include <iostream>
+#include <stdexcept>
+#include <string>
 
 #include "BCKernelManager.h"
-#include "Parameter/Parameter.h"
-#include "GPU/GPU_Interface.h"
-#include "Calculation/DragLift.h"
-#include "Calculation/Cp.h"
 #include "BoundaryConditions/BoundaryConditionFactory.h"
 #include "gpu/VirtualFluids_GPU/PreCollisionInteractor/VelocitySetter.h"
+#include "Calculation/Cp.h"
+#include "Calculation/DragLift.h"
+#include "GPU/GPU_Interface.h"
+#include "Parameter/Parameter.h"
 
-BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFactory* bcFactory): para(parameter)
+BCKernelManager::BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFactory *bcFactory) : para(parameter)
 {
     this->velocityBoundaryConditionPost = bcFactory->getVelocityBoundaryConditionPost();
     this->noSlipBoundaryConditionPost   = bcFactory->getNoSlipBoundaryConditionPost();
     this->slipBoundaryConditionPost     = bcFactory->getSlipBoundaryConditionPost();
     this->pressureBoundaryConditionPre  = bcFactory->getPressureBoundaryConditionPre();
     this->geometryBoundaryConditionPost = bcFactory->getGeometryBoundaryConditionPost();
-    this->stressBoundaryConditionPost       = bcFactory->getStressBoundaryConditionPost();
+    this->stressBoundaryConditionPost   = bcFactory->getStressBoundaryConditionPost();
+
+    checkBoundaryCondition(this->velocityBoundaryConditionPost, this->para->getParD(0)->velocityBC,
+                           "velocityBoundaryConditionPost");
+    checkBoundaryCondition(this->noSlipBoundaryConditionPost, this->para->getParD(0)->noSlipBC,
+                           "noSlipBoundaryConditionPost");
+    checkBoundaryCondition(this->slipBoundaryConditionPost, this->para->getParD(0)->slipBC,
+                           "slipBoundaryConditionPost");
+    checkBoundaryCondition(this->pressureBoundaryConditionPre, this->para->getParD(0)->pressureBC,
+                           "pressureBoundaryConditionPre");
+    checkBoundaryCondition(this->geometryBoundaryConditionPost, this->para->getParD(0)->geometryBC,
+                           "geometryBoundaryConditionPost");
+    checkBoundaryCondition(this->stressBoundaryConditionPost, this->para->getParD(0)->stressBC,
+                           "stressBoundaryConditionPost");
 }
 
 void BCKernelManager::runVelocityBCKernelPre(const int level) const
@@ -159,7 +175,7 @@ void BCKernelManager::runGeoBCKernelPre(const int level, unsigned int t, CudaMem
             //Calculation of cp
             ////////////////////////////////////////////////////////////////////////////////
 
-            if(t > para->getTStartOut())
+            if(t > para->getTimestepStartOut())
             {
                 ////////////////////////////////////////////////////////////////////////////////
                 CalcCPtop27(
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
index 1aa85fac3cc30ec68cca9b0627d2190b716eedcd..2c81e9f8baaf6251e16b4bcc6b793bce3c9b38ef 100644
--- a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManager.h
@@ -26,19 +26,20 @@
 //  You should have received a copy of the GNU General Public License along
 //  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
 //
-//! \file LBKernelManager.h
+//! \file BCKernelManager.h
 //! \ingroup KernelManager
-//! \author Martin Schoenherr
+//! \author Martin Schoenherr, Anna Wellmann
 //=======================================================================================
-#ifndef LBKernelManager_H
-#define LBKernelManager_H
+#ifndef BCKernelManager_H
+#define BCKernelManager_H
 
 #include <functional>
 #include <memory>
+#include <string>
 
+#include "LBM/LB.h"
 #include "PointerDefinitions.h"
 #include "VirtualFluids_GPU_export.h"
-#include "LBM/LB.h"
 
 class CudaMemoryManager;
 class BoundaryConditionFactory;
@@ -46,19 +47,19 @@ class Parameter;
 struct LBMSimulationParameter;
 
 using boundaryCondition = std::function<void(LBMSimulationParameter *, QforBoundaryConditions *)>;
-using boundaryConditionPara = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
+using boundaryConditionWithParameter = std::function<void(Parameter *, QforBoundaryConditions *, const int level)>;
 
-//! \class LBKernelManager
-//! \brief manage the cuda kernel calls
+//! \class BCKernelManager
+//! \brief manage the cuda kernel calls to boundary conditions
+//! \details This class stores the boundary conditions and manages the calls to the boundary condition kernels.
 class VIRTUALFLUIDS_GPU_EXPORT BCKernelManager
 {
 public:
     //! Class constructor
     //! \param parameter shared pointer to instance of class Parameter
+    //! \throws std::runtime_error when the user forgets to specify a boundary condition
     BCKernelManager(SPtr<Parameter> parameter, BoundaryConditionFactory *bcFactory);
 
-    void setBoundaryConditionKernels();
-
     //! \brief calls the device function of the velocity boundary condition (post-collision)
     void runVelocityBCKernelPost(const int level) const;
 
@@ -71,10 +72,10 @@ public:
     //! \brief calls the device function of the geometry boundary condition (pre-collision)
     void runGeoBCKernelPre(const int level, unsigned int t, CudaMemoryManager *cudaMemoryManager) const;
 
-    //! \brief calls the device function of the slip boundary condition
+    //! \brief calls the device function of the slip boundary condition (post-collision)
     void runSlipBCKernelPost(const int level) const;
 
-    //! \brief calls the device function of the no-slip boundary condition
+    //! \brief calls the device function of the no-slip boundary condition (post-collision)
     void runNoSlipBCKernelPost(const int level) const;
 
     //! \brief calls the device function of the pressure boundary condition (pre-collision)
@@ -89,20 +90,29 @@ public:
     //! \brief calls the device function of the outflow boundary condition
     void runOutflowBCKernelPre(const int level) const;
 
-    //! \brief calls the device function of the stress wall model
+    //! \brief calls the device function of the stress wall model (post-collision)
     void runStressWallModelKernelPost(const int level) const;
 
-    //! \brief calls the device function that calculates the macroscopic values
-    void calculateMacroscopicValues(const int level) const;
-
 private:
+    //! \brief check if a boundary condition was set
+    //! \throws std::runtime_error if boundary nodes were assigned, but no boundary condition was set in the boundary condition factory
+    //! \param boundaryCondition: a kernel function for the boundary condition
+    //! \param bcStruct: a struct containing the grid nodes which are part of the boundary condition
+    //! \param bcName: the name of the checked boundary condition
+    template <typename bcFunction>
+    void checkBoundaryCondition(const bcFunction &boundaryCondition, const QforBoundaryConditions &bcStruct, const std::string &bcName)
+    {
+        if (!boundaryCondition && bcStruct.numberOfBCnodes > 0)
+            throw std::runtime_error("The boundary condition " + bcName + " was not set!");
+    }
+
     SPtr<Parameter> para;
 
-    boundaryCondition velocityBoundaryConditionPost;
-    boundaryCondition noSlipBoundaryConditionPost;
-    boundaryCondition slipBoundaryConditionPost;
-    boundaryCondition pressureBoundaryConditionPre;
-    boundaryCondition geometryBoundaryConditionPost;
-    boundaryConditionPara stressBoundaryConditionPost;
+    boundaryCondition velocityBoundaryConditionPost = nullptr;
+    boundaryCondition noSlipBoundaryConditionPost = nullptr;
+    boundaryCondition slipBoundaryConditionPost = nullptr;
+    boundaryCondition pressureBoundaryConditionPre = nullptr;
+    boundaryCondition geometryBoundaryConditionPost = nullptr;
+    boundaryConditionWithParameter stressBoundaryConditionPost = nullptr;
 };
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..13548950b96c3ac4e4479c44555a99fdf3cb09b7
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/KernelManager/BCKernelManagerTest.cpp
@@ -0,0 +1,55 @@
+#include <gmock/gmock.h>
+#include <stdexcept>
+
+#include "BCKernelManager.h"
+#include "BoundaryConditions/BoundaryConditionFactory.h"
+#include "Parameter/Parameter.h"
+#include "PointerDefinitions.h"
+
+class BCKernelManagerTest_BCsNotSpecified : public testing::Test
+{
+protected:
+    BoundaryConditionFactory bcFactory;
+    SPtr<Parameter> para = std::make_shared<Parameter>();
+
+    void SetUp() override
+    {
+        para->initLBMSimulationParameter();
+    }
+};
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, velocityBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->velocityBC.numberOfBCnodes = 1;
+    EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
+}
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, noSlipBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->noSlipBC.numberOfBCnodes = 1;
+    EXPECT_NO_THROW(BCKernelManager(para, &bcFactory)); // no throw, as a default is specified
+}
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, slipBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->slipBC.numberOfBCnodes = 1;
+    EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
+}
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, pressureBoundaryConditionPre_NotSpecified)
+{
+    para->getParD(0)->pressureBC.numberOfBCnodes = 1;
+    EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
+}
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, geometryBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->geometryBC.numberOfBCnodes = 1;
+    EXPECT_NO_THROW(BCKernelManager(para, &bcFactory)); // no throw, as a default is specified
+}
+
+TEST_F(BCKernelManagerTest_BCsNotSpecified, stressBoundaryConditionPost_NotSpecified)
+{
+    para->getParD(0)->stressBC.numberOfBCnodes = 1;
+    EXPECT_THROW(BCKernelManager(para, &bcFactory), std::runtime_error);
+}
diff --git a/src/gpu/VirtualFluids_GPU/LBM/LB.h b/src/gpu/VirtualFluids_GPU/LBM/LB.h
index 510f057270e76af7d2e9547840e21242dd061e6c..0539f3789c17b42761e0097e2757ac1213bbe8f0 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/LB.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/LB.h
@@ -1,7 +1,7 @@
 //  _    ___      __              __________      _     __        ______________   __
 // | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
 // | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
+// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ /
 // |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
 //
 //////////////////////////////////////////////////////////////////////////
@@ -50,6 +50,19 @@
 #include <string>
 #include <vector>
 
+//! \brief An enumeration for selecting a turbulence model
+enum class TurbulenceModel {
+   //! - Smagorinsky
+    Smagorinsky,
+    //! - AMD (Anisotropic Minimum Dissipation) model, see e.g. Rozema et al., Phys. Fluids 27, 085107 (2015), https://doi.org/10.1063/1.4928700
+    AMD,
+    //! - QR model by Verstappen 
+    QR,
+    //! - TODO: move the WALE model here from the old kernels
+    //WALE
+    //! - No turbulence model
+    None
+};
 
 struct InitCondition
 {
@@ -61,7 +74,7 @@ struct InitCondition
    real RealX {1.0};
    real RealY {1.0};
    int numprocs {1};
-   int myid {0};
+   int myProcessId {0};
    int maxdev {1};
    uint tDoCheckPoint {0};
    uint tDoRestart {0};
@@ -78,10 +91,10 @@ struct InitCondition
    std::vector<int> GridX, GridY, GridZ, DistX, DistY, DistZ;
    std::vector<real> scaleLBMtoSI, translateLBMtoSI;
    std::vector<real> minCoordX, minCoordY, minCoordZ, maxCoordX, maxCoordY, maxCoordZ;
-   std::string fname;
+   std::string fname {"output/simulation"};
    std::string oPath {"output/"};
    std::string gridPath {"grid/"};
-   std::string oPrefix {"MyFile"};
+   std::string oPrefix {"simulation"};
    std::string geometryFileC, geometryFileM, geometryFileF;
    std::string kFull, geoFull, geoVec, coordX, coordY, coordZ, neighborX, neighborY, neighborZ, neighborWSB, scaleCFC, scaleCFF, scaleFCC, scaleFCF, scaleOffsetCF, scaleOffsetFC;
    std::string noSlipBcPos, noSlipBcQs, noSlipBcValue;
@@ -122,8 +135,8 @@ struct InitCondition
    bool calcMedian {false};
    bool isConc {false};
    bool isWale {false};
+   TurbulenceModel turbulenceModel {TurbulenceModel::None};
    bool isTurbulentViscosity {false};
-   bool isAMD {false};
    real SGSConstant {0.0};
    bool isMeasurePoints {false};
    bool isInitNeq {false};
@@ -131,6 +144,7 @@ struct InitCondition
    bool hasWallModelMonitor {false};
    bool simulatePorousMedia {false};
    bool streetVelocityFile {false};
+   real outflowPressureCorrectionFactor {0.0};
 };
 
 //Interface Cells
@@ -301,6 +315,8 @@ typedef struct PLP{
 	uint memSizeID, memSizeTimestep, memSizerealAll, memSizereal, memSizeBool, memSizeBoolBC;
 }PathLineParticles;
 
+
+
 //////////////////////////////////////////////////////////////////////////
 inline int vectorPosition(int i, int j, int k, int Lx, int Ly )
 {
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
index 4cdb9f1db6514d61ff90327a928822df924e7270..83ca85243ae4e8cc8ff1316e73ed8c5ae3816516 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.cpp
@@ -1,10 +1,10 @@
 #include "Simulation.h"
 
-#include <stdio.h>
 #include <vector>
 
 #include <helper_timer.h>
 
+
 #include "LBM/LB.h"
 #include "Communication/Communicator.h"
 #include "Communication/ExchangeData27.h"
@@ -20,6 +20,7 @@
 #include "Output/AnalysisData.hpp"
 #include "Output/InterfaceDebugWriter.hpp"
 #include "Output/EdgeNodeDebugWriter.hpp"
+#include "Output/NeighborDebugWriter.hpp"
 #include "Output/VeloASCIIWriter.hpp"
 //////////////////////////////////////////////////////////////////////////
 #include "Utilities/Buffer2D.hpp"
@@ -44,6 +45,7 @@
 #include "Calculation/PorousMedia.h"
 //////////////////////////////////////////////////////////////////////////
 #include "Output/Timer.h"
+#include "Output/FileWriter.h"
 //////////////////////////////////////////////////////////////////////////
 #include "Restart/RestartObject.h"
 //////////////////////////////////////////////////////////////////////////
@@ -51,15 +53,14 @@
 #include "Output/DataWriter.h"
 #include "Kernel/Utilities/KernelFactory/KernelFactory.h"
 #include "PreProcessor/PreProcessorFactory/PreProcessorFactory.h"
-#include "Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
 #include "PreProcessor/PreProcessorFactory/PreProcessorFactoryImp.h"
+#include "Kernel/Utilities/KernelFactory/KernelFactoryImp.h"
 #include "Kernel/Kernel.h"
-
+#include "TurbulenceModels/TurbulenceModelFactory.h"
 #include <cuda/DeviceInfo.h>
 
 #include <logger/Logger.h>
 
-#include "Output/FileWriter.h"
 
 
 std::string getFileName(const std::string& fname, int step, int myID)
@@ -70,12 +71,26 @@ std::string getFileName(const std::string& fname, int step, int myID)
 Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
                        vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory)
     : para(para), cudaMemoryManager(memoryManager), communicator(communicator), kernelFactory(std::make_unique<KernelFactoryImp>()),
-      preProcessorFactory(std::make_unique<PreProcessorFactoryImp>()), dataWriter(std::make_unique<FileWriter>())
+      preProcessorFactory(std::make_shared<PreProcessorFactoryImp>()), dataWriter(std::make_unique<FileWriter>())
+{
+	this->tmFactory = SPtr<TurbulenceModelFactory>( new TurbulenceModelFactory(para) );
+	init(gridProvider, bcFactory, tmFactory);
+}
+
+Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
+                       vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory)
+    : para(para), cudaMemoryManager(memoryManager), communicator(communicator), kernelFactory(std::make_unique<KernelFactoryImp>()),
+      preProcessorFactory(std::make_shared<PreProcessorFactoryImp>()), dataWriter(std::make_unique<FileWriter>())
+{
+	init(gridProvider, bcFactory, tmFactory);
+}
+
+void Simulation::init(GridProvider &gridProvider, BoundaryConditionFactory *bcFactory, SPtr<TurbulenceModelFactory> tmFactory)
 {
     gridProvider.initalGridInformations();
 
     vf::cuda::verifyAndSetDevice(
-        communicator.mapCudaDevice(para->getMyID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
+        communicator.mapCudaDevice(para->getMyProcessID(), para->getNumprocs(), para->getDevices(), para->getMaxDev()));
 
     para->initLBMSimulationParameter();
 
@@ -98,43 +113,19 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
 
     restart_object = std::make_shared<ASCIIRestartObject>();
     //////////////////////////////////////////////////////////////////////////
-    output.setName(para->getFName() + StringUtil::toString<int>(para->getMyID()) + ".log");
-    if (para->getMyID() == 0)
-        output.setConsoleOut(true);
-    output.clearLogFile();
-    //////////////////////////////////////////////////////////////////////////
     // CUDA streams
     if (para->getUseStreams()) {
         para->getStreamManager()->launchStreams(2u);
         para->getStreamManager()->createCudaEvents();
     }
     //////////////////////////////////////////////////////////////////////////
-    //
-    // output << para->getNeedInterface().at(0) << "\n";
-    // output << para->getNeedInterface().at(1) << "\n";
-    // output << para->getNeedInterface().at(2) << "\n";
-    // output << para->getNeedInterface().at(3) << "\n";
-    // output << para->getNeedInterface().at(4) << "\n";
-    // output << para->getNeedInterface().at(5) << "\n";
-    //////////////////////////////////////////////////////////////////////////
-    // output << "      \t GridX \t GridY \t GridZ \t DistX \t DistY \t DistZ\n";
-    // for (int testout=0; testout<=para->getMaxLevel();testout++)
-    //{
-    //   output << "Level " << testout << ":  " << para->getGridX().at(testout) << " \t " <<
-    //   para->getGridY().at(testout) << " \t " << para->getGridZ().at(testout) << " \t " <<
-    //   para->getDistX().at(testout) << " \t " << para->getDistY().at(testout) << " \t " <<
-    //   para->getDistZ().at(testout) << " \n";
-    //}
-    //////////////////////////////////////////////////////////////////////////
-    output << "LB_Modell:  D3Q" << para->getD3Qxx() << "\n";
-    output << "Re:         " << para->getRe() << "\n";
-    output << "vis_ratio:  " << para->getViscosityRatio() << "\n";
-    output << "u0_ratio:   " << para->getVelocityRatio() << "\n";
-    output << "delta_rho:  " << para->getDensityRatio() << "\n";
-    output << "QuadricLimiters:  " << para->getQuadricLimitersHost()[0] << "\t" << para->getQuadricLimitersHost()[1]
-           << "\t" << para->getQuadricLimitersHost()[2] << "\n";
-    if (para->getUseAMD())
-        output << "AMD SGS model:  " << para->getSGSConstant() << "\n";
+    VF_LOG_INFO("LB_Modell:       D3Q{}", para->getD3Qxx());
+    VF_LOG_INFO("Re:              {}", para->getRe());
+    VF_LOG_INFO("vis_ratio:       {}", para->getViscosityRatio());
+    VF_LOG_INFO("u0_ratio:        {}", para->getVelocityRatio());
+    VF_LOG_INFO("delta_rho:       {}", para->getDensityRatio());
+    VF_LOG_INFO("QuadricLimiters: {}, \t{}, \t{}", para->getQuadricLimitersHost()[0],
+                para->getQuadricLimitersHost()[1], para->getQuadricLimitersHost()[2]);
     //////////////////////////////////////////////////////////////////////////
 
     /////////////////////////////////////////////////////////////////////////
@@ -153,27 +144,25 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
     //////////////////////////////////////////////////////////////////////////
     // Kernel init
     //////////////////////////////////////////////////////////////////////////
-    output << "make Kernels  "
-           << "\n";
+    VF_LOG_INFO("make Kernels");
     kernels = kernelFactory->makeKernels(para);
 
-    output << "make AD Kernels  "
-           << "\n";
-    if (para->getDiffOn())
+    if (para->getDiffOn()) {
+        VF_LOG_INFO("make AD Kernels");
         adKernels = kernelFactory->makeAdvDifKernels(para);
+    }
 
     //////////////////////////////////////////////////////////////////////////
     // PreProcessor init
     //////////////////////////////////////////////////////////////////////////
-    output << "make Preprocessors  "
-           << "\n";
+    VF_LOG_INFO("make Preprocessors");
     std::vector<PreProcessorType> preProTypes = kernels.at(0)->getPreProcessorTypes();
     preProcessor = preProcessorFactory->makePreProcessor(preProTypes, para);
 
     //////////////////////////////////////////////////////////////////////////
     // Particles preprocessing
     //////////////////////////////////////////////////////////////////////////
-    if (para->getCalcParticle()) {
+    if (para->getCalcParticles()) {
         rearrangeGeometry(para.get(), cudaMemoryManager.get());
         //////////////////////////////////////////////////////////////////////////
         allocParticles(para.get(), cudaMemoryManager.get());
@@ -212,8 +201,7 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
     // Median
     //////////////////////////////////////////////////////////////////////////
     if (para->getCalcMedian()) {
-        output << "alloc Calculation for Mean Values  "
-               << "\n";
+        VF_LOG_INFO("alloc Calculation for Mean Values");
         if (para->getDiffOn())
             allocMedianAD(para.get(), cudaMemoryManager.get());
         else
@@ -224,8 +212,7 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
     // Turbulence Intensity
     //////////////////////////////////////////////////////////////////////////
     if (para->getCalcTurbulenceIntensity()) {
-        output << "alloc arrays for calculating Turbulence Intensity  "
-               << "\n";
+        VF_LOG_INFO("alloc arrays for calculating Turbulence Intensity");
         allocTurbulenceIntensity(para.get(), cudaMemoryManager.get());
     }
 
@@ -249,112 +236,103 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
     // MeasurePoints
     //////////////////////////////////////////////////////////////////////////
     if (para->getUseMeasurePoints()) {
-        output << "read measure points...";
+        VF_LOG_INFO("read measure points");
         readMeasurePoints(para.get(), cudaMemoryManager.get());
-        output << "done.\n";
     }
 
     //////////////////////////////////////////////////////////////////////////
     // Porous Media
     //////////////////////////////////////////////////////////////////////////
     if (para->getSimulatePorousMedia()) {
-        output << "define area(s) of porous media...";
+        VF_LOG_INFO("define area(s) of porous media");
         porousMedia();
         kernelFactory->setPorousMedia(pm);
-        output << "done.\n";
     }
 
     //////////////////////////////////////////////////////////////////////////
     // enSightGold
     //////////////////////////////////////////////////////////////////////////
     // excludeGridInterfaceNodesForMirror(para, 7);
-    ////output << "print case file...";
+    ////VF_LOG_INFO("print case file...");
     // printCaseFile(para);
-    ////output << "done.\n";
-    ////output << "print geo file...";
+    ////VF_LOG_INFO("print geo file...");
     // printGeoFile(para, true);  //true for binary
-    ////output << "done.\n";
+    ////VF_LOG_INFO("done.");
 
     //////////////////////////////////////////////////////////////////////////
     // Forcing
     //////////////////////////////////////////////////////////////////////////
     ////allocVeloForForcing(para);
-    // output << "new object forceCalulator  " << "\n";
+    // VF_LOG_INFO("new object forceCalulator");
     // forceCalculator = std::make_shared<ForceCalculations>(para.get());
 
     //////////////////////////////////////////////////////////////////////////
-    // output << "define the Grid..." ;
+    // VF_LOG_INFO("define the Grid...");
     // defineGrid(para, communicator);
     ////allocateMemory();
-    // output << "done.\n";
+    // VF_LOG_INFO("done.");
 
-    output << "init lattice...";
+    VF_LOG_INFO("init lattice...");
     initLattice(para, preProcessor, cudaMemoryManager);
-    output << "done.\n";
+    VF_LOG_INFO("done");
 
-    // output << "set geo for Q...\n" ;
+    // VF_LOG_INFO("set geo for Q...\n");
     // setGeoForQ();
-    // output << "done.\n";
 
     // if (maxlevel>1)
     //{
-    // output << "find Qs...\n" ;
+    // VF_LOG_INFO("find Qs...");
     // findQ27(para);
-    // output << "done.\n";
+    // VF_LOG_INFO("done.");
     //}
 
     // if (para->getDiffOn()==true)
     //{
-    //   output << "define TempBC...\n" ;
+    //   VF_LOG_INFO("define TempBC...");
     //   findTempSim(para);
-    //   output << "done.\n";
 
-    //   output << "define TempVelBC...\n" ;
+    //   VF_LOG_INFO("define TempVelBC...");
     //   findTempVelSim(para);
-    //   output << "done.\n";
 
-    //   output << "define TempPressBC...\n" ;
+    //   VF_LOG_INFO("define TempPressBC...");
     //   findTempPressSim(para);
-    //   output << "done.\n";
+    //   VF_LOG_INFO("done.");
     //}
 
-    // output << "find Qs-BC...\n" ;
+    // VF_LOG_INFO("find Qs-BC...");
     // findBC27(para);
-    // output << "done.\n";
 
-    // output << "find Press-BC...\n" ;
+    // VF_LOG_INFO("find Press-BC...");
     // findPressQShip(para);
-    // output << "done.\n";
+    // VF_LOG_INFO("done.");
 
     //////////////////////////////////////////////////////////////////////////
     // find indices of corner nodes for multiGPU communication
     //////////////////////////////////////////////////////////////////////////
     if (para->getDevices().size() > 2) {
-        output << "Find indices of edge nodes for multiGPU communication ...";
+        VF_LOG_INFO("Find indices of edge nodes for multiGPU communication");
         vf::gpu::findEdgeNodesCommMultiGPU(*para);
-        output << "done.\n";
     }
     //////////////////////////////////////////////////////////////////////////
     // Memory alloc for CheckPoint / Restart
     //////////////////////////////////////////////////////////////////////////
     if (para->getDoCheckPoint() || para->getDoRestart()) {
-        output << "Alloc Memory for CheckPoint / Restart...";
+        VF_LOG_INFO("Alloc Memory for CheckPoint / Restart");
         for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
             cudaMemoryManager->cudaAllocFsForCheckPointAndRestart(lev);
         }
-        output << "done.\n";
     }
 
     //////////////////////////////////////////////////////////////////////////
     // Restart
     //////////////////////////////////////////////////////////////////////////
     if (para->getDoRestart()) {
-        output << "Restart...\n...get the Object...\n";
+        VF_LOG_INFO("Restart...\n...get the Object...");
 
-        const auto name = getFileName(para->getFName(), para->getTimeDoRestart(), para->getMyID());
+        const auto name = getFileName(para->getFName(), para->getTimeDoRestart(), para->getMyProcessID());
         restart_object->deserialize(name, para);
 
-        output << "...copy Memory for Restart...\n";
+        VF_LOG_INFO("...copy Memory for Restart...");
         for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
             //////////////////////////////////////////////////////////////////////////
             cudaMemoryManager->cudaCopyFsForRestart(lev);
@@ -371,29 +349,31 @@ Simulation::Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemo
             // test...should not work...and does not
             // para->getEvenOrOdd(lev)==false;
         }
-        output << "done.\n";
+        VF_LOG_INFO("done.");
     }
 
     //////////////////////////////////////////////////////////////////////////
     // Init UpdateGrid
     //////////////////////////////////////////////////////////////////////////
-    this->updateGrid27 = std::make_unique<UpdateGrid27>(para, communicator, cudaMemoryManager, pm, kernels, bcFactory);
+    this->updateGrid27 = std::make_unique<UpdateGrid27>(para, communicator, cudaMemoryManager, pm, kernels, bcFactory, tmFactory);
 
     //////////////////////////////////////////////////////////////////////////
-    // Print Init
+    // Write Initialized Files
     //////////////////////////////////////////////////////////////////////////
-    output << "Print files Init...";
+    VF_LOG_INFO("Write initialized Files ...");
     dataWriter->writeInit(para, cudaMemoryManager);
-    if (para->getCalcParticle())
+    if (para->getCalcParticles())
         copyAndPrintParticles(para.get(), cudaMemoryManager.get(), 0, true);
-    output << "done.\n";
+    VF_LOG_INFO("... done.");
 
     //////////////////////////////////////////////////////////////////////////
-    output << "used Device Memory: " << cudaMemoryManager->getMemsizeGPU() / 1000000.0 << " MB\n";
+    VF_LOG_INFO("used Device Memory: {} MB", cudaMemoryManager->getMemsizeGPU() / 1000000.0);
     // std::cout << "Process " << communicator.getPID() <<": used device memory" << cudaMemoryManager->getMemsizeGPU() /
     // 1000000.0 << " MB\n" << std::endl;
     //////////////////////////////////////////////////////////////////////////
 
+    // NeighborDebugWriter::writeNeighborLinkLinesDebug(para.get());
+
     // InterfaceDebugWriter::writeInterfaceLinesDebugCF(para.get());
     // InterfaceDebugWriter::writeInterfaceLinesDebugFC(para.get());
 
@@ -418,9 +398,9 @@ void Simulation::addEnstrophyAnalyzer(uint tAnalyse)
     this->enstrophyAnalyzer = std::make_unique<EnstrophyAnalyzer>(this->para, tAnalyse);
 }
 
-void Simulation::setDataWriter(std::unique_ptr<DataWriter>&& dataWriter_)
+void Simulation::setDataWriter(std::shared_ptr<DataWriter> dataWriter_)
 {
-    this->dataWriter = std::move(dataWriter_);
+    this->dataWriter = dataWriter_;
 }
 
 void Simulation::setFactories(std::unique_ptr<KernelFactory> &&kernelFactory_,
@@ -442,7 +422,7 @@ void Simulation::allocNeighborsOffsetsScalesAndBoundaries(GridProvider &gridProv
 
 void Simulation::run()
 {
-   unsigned int t, t_prev;
+   unsigned int timestep, t_prev;
    uint t_turbulenceIntensity = 0;
    unsigned int t_MP = 0;
 
@@ -459,29 +439,27 @@ void Simulation::run()
    para->setAngularVelocity(AngularVelocity);
    for (int i = 0; i<= para->getMaxLevel(); i++)
    {
-	   para->getParD(i)->deltaPhi = (real)(para->getAngularVelocity()/(pow(2.,i)));
+       para->getParD(i)->deltaPhi = (real)(para->getAngularVelocity()/(pow(2.,i)));
    }
    //////////////////////////////////////////////////////////////////////////
 
    t_prev = para->getTimeCalcMedStart();
 
-	output << "getMaxLevel = " << para->getMaxLevel() << "\n";
-
-	Timer* averageTimer = new Timer("Average performance");
-	averageTimer->startTimer();
+    Timer* averageTimer = new Timer("Average performance");
+    averageTimer->startTimer();
 
-	////////////////////////////////////////////////////////////////////////////////
-	// Time loop
-	////////////////////////////////////////////////////////////////////////////////
-	for(t=para->getTStart();t<=para->getTEnd();t++)
-	{
-        this->updateGrid27->updateGrid(0, t);
+    ////////////////////////////////////////////////////////////////////////////////
+    // Time loop
+    ////////////////////////////////////////////////////////////////////////////////
+    for(timestep=para->getTimestepStart();timestep<=para->getTimestepEnd();timestep++)
+    {
+        this->updateGrid27->updateGrid(0, timestep);
 
-	    ////////////////////////////////////////////////////////////////////////////////
-	    //Particles
-	    ////////////////////////////////////////////////////////////////////////////////
-	    if (para->getCalcParticle()) propagateParticles(para.get(), t);
-	    ////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////
+        //Particles
+        ////////////////////////////////////////////////////////////////////////////////
+        if (para->getCalcParticles()) propagateParticles(para.get(), timestep);
+        ////////////////////////////////////////////////////////////////////////////////
 
 
 
@@ -489,15 +467,14 @@ void Simulation::run()
         ////////////////////////////////////////////////////////////////////////////////
         // run Analyzers for kinetic energy and enstrophy for TGV in 3D
         // these analyzers only work on level 0
-	    ////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////
         if (this->kineticEnergyAnalyzer || this->enstrophyAnalyzer) {
-            prepareExchangeMultiGPU(para.get(), 0, -1);
-            exchangeMultiGPU(para.get(), communicator, cudaMemoryManager.get(), 0, -1);
+            updateGrid27->exchangeData(0);
         }
 
-	    if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(t);
-	    if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(t);
-	    ////////////////////////////////////////////////////////////////////////////////
+        if( this->kineticEnergyAnalyzer ) this->kineticEnergyAnalyzer->run(timestep);
+        if( this->enstrophyAnalyzer     ) this->enstrophyAnalyzer->run(timestep);
+        ////////////////////////////////////////////////////////////////////////////////
 
 
 
@@ -505,66 +482,66 @@ void Simulation::run()
         ////////////////////////////////////////////////////////////////////////////////
         //Calc Median
         ////////////////////////////////////////////////////////////////////////////////
-        if (para->getCalcMedian() && ((int)t >= para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()))
+        if (para->getCalcMedian() && ((int)timestep >= para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()))
         {
           for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
           {
-        	  //CalcMedSP27(para->getParD(lev)->vx_SP_Med,
-        			//	  para->getParD(lev)->vy_SP_Med,
-        			//	  para->getParD(lev)->vz_SP_Med,
-        			//	  para->getParD(lev)->rho_SP_Med,
-        			//	  para->getParD(lev)->press_SP_Med,
-        			//	  para->getParD(lev)->geoSP,
-        			//	  para->getParD(lev)->neighborX_SP,
-        			//	  para->getParD(lev)->neighborY_SP,
-        			//	  para->getParD(lev)->neighborZ_SP,
-        			//	  para->getParD(lev)->size_Mat_SP,
-        			//	  para->getParD(lev)->numberofthreads,
-        			//	  para->getParD(lev)->d0SP.f[0],
-        			//	  para->getParD(lev)->evenOrOdd);
-        	  //getLastCudaError("CalcMacSP27 execution failed");
-
-        	  CalcMedCompSP27(para->getParD(lev)->vx_SP_Med,
-        					  para->getParD(lev)->vy_SP_Med,
-        					  para->getParD(lev)->vz_SP_Med,
-        					  para->getParD(lev)->rho_SP_Med,
-        					  para->getParD(lev)->press_SP_Med,
-        					  para->getParD(lev)->typeOfGridNode,
-        					  para->getParD(lev)->neighborX,
-        					  para->getParD(lev)->neighborY,
-        					  para->getParD(lev)->neighborZ,
-        					  para->getParD(lev)->numberOfNodes,
-        					  para->getParD(lev)->numberofthreads,
-        					  para->getParD(lev)->distributions.f[0],
-        					  para->getParD(lev)->isEvenTimestep);
-        	  getLastCudaError("CalcMacMedCompSP27 execution failed");
+              //CalcMedSP27(para->getParD(lev)->vx_SP_Med,
+                    //      para->getParD(lev)->vy_SP_Med,
+                    //      para->getParD(lev)->vz_SP_Med,
+                    //      para->getParD(lev)->rho_SP_Med,
+                    //      para->getParD(lev)->press_SP_Med,
+                    //      para->getParD(lev)->geoSP,
+                    //      para->getParD(lev)->neighborX_SP,
+                    //      para->getParD(lev)->neighborY_SP,
+                    //      para->getParD(lev)->neighborZ_SP,
+                    //      para->getParD(lev)->size_Mat_SP,
+                    //      para->getParD(lev)->numberofthreads,
+                    //      para->getParD(lev)->d0SP.f[0],
+                    //      para->getParD(lev)->evenOrOdd);
+              //getLastCudaError("CalcMacSP27 execution failed");
+
+              CalcMedCompSP27(para->getParD(lev)->vx_SP_Med,
+                              para->getParD(lev)->vy_SP_Med,
+                              para->getParD(lev)->vz_SP_Med,
+                              para->getParD(lev)->rho_SP_Med,
+                              para->getParD(lev)->press_SP_Med,
+                              para->getParD(lev)->typeOfGridNode,
+                              para->getParD(lev)->neighborX,
+                              para->getParD(lev)->neighborY,
+                              para->getParD(lev)->neighborZ,
+                              para->getParD(lev)->numberOfNodes,
+                              para->getParD(lev)->numberofthreads,
+                              para->getParD(lev)->distributions.f[0],
+                              para->getParD(lev)->isEvenTimestep);
+              getLastCudaError("CalcMacMedCompSP27 execution failed");
 
           }
         }
 
-		if (para->getCalcTurbulenceIntensity()) {
+        if (para->getCalcTurbulenceIntensity()) {
             for (int lev = para->getCoarse(); lev <= para->getFine(); lev++) {
-				CalcTurbulenceIntensityDevice(
-				    para->getParD(lev)->vxx,
-				    para->getParD(lev)->vyy,
-				    para->getParD(lev)->vzz,
-				    para->getParD(lev)->vxy,
-				    para->getParD(lev)->vxz,
-				    para->getParD(lev)->vyz,
-				    para->getParD(lev)->vx_mean,
-				    para->getParD(lev)->vy_mean,
-				    para->getParD(lev)->vz_mean,
-				    para->getParD(lev)->distributions.f[0],
-				    para->getParD(lev)->typeOfGridNode,
-				    para->getParD(lev)->neighborX,
-				    para->getParD(lev)->neighborY,
-				    para->getParD(lev)->neighborZ,
-				    para->getParD(lev)->numberOfNodes,
-				    para->getParD(lev)->isEvenTimestep,
-				    para->getParD(lev)->numberofthreads
-				);
-			}
-		}
+                CalcTurbulenceIntensityDevice(
+                    para->getParD(lev)->vxx,
+                    para->getParD(lev)->vyy,
+                    para->getParD(lev)->vzz,
+                    para->getParD(lev)->vxy,
+                    para->getParD(lev)->vxz,
+                    para->getParD(lev)->vyz,
+                    para->getParD(lev)->vx_mean,
+                    para->getParD(lev)->vy_mean,
+                    para->getParD(lev)->vz_mean,
+                    para->getParD(lev)->distributions.f[0],
+                    para->getParD(lev)->typeOfGridNode,
+                    para->getParD(lev)->neighborX,
+                    para->getParD(lev)->neighborY,
+                    para->getParD(lev)->neighborZ,
+                    para->getParD(lev)->numberOfNodes,
+                    para->getParD(lev)->isEvenTimestep,
+                    para->getParD(lev)->numberofthreads
+                );
+            }
+        }
         ////////////////////////////////////////////////////////////////////////////////
 
 
@@ -573,29 +550,29 @@ void Simulation::run()
         ////////////////////////////////////////////////////////////////////////////////
         // CheckPoint
         ////////////////////////////////////////////////////////////////////////////////
-        if(para->getDoCheckPoint() && para->getTimeDoCheckPoint()>0 && t%para->getTimeDoCheckPoint()==0 && t>0 && !para->overWritingRestart(t))
+        if(para->getDoCheckPoint() && para->getTimeDoCheckPoint()>0 && timestep%para->getTimeDoCheckPoint()==0 && timestep>0 && !para->overWritingRestart(timestep))
         {
-			averageTimer->stopTimer();
+            averageTimer->stopTimer();
             //////////////////////////////////////////////////////////////////////////
 
             if( para->getDoCheckPoint() )
             {
-                output << "Copy data for CheckPoint t=" << t << "...\n";
+                VF_LOG_INFO("Copy data for CheckPoint t = {}....", timestep);
 
                 for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
                 {
                     cudaMemoryManager->cudaCopyFsForCheckPoint(lev);
                 }
 
-                output << "Write data for CheckPoint t=" << t << "...";
+                VF_LOG_INFO("Write data for CheckPoint t = {}...", timestep);
 
-				const auto name = getFileName(para->getFName(), t, para->getMyID());
-				restart_object->serialize(name, para);
+                const auto name = getFileName(para->getFName(), timestep, para->getMyProcessID());
+                restart_object->serialize(name, para);
 
-                output << "\n done\n";
+                VF_LOG_INFO("done");
             }
             //////////////////////////////////////////////////////////////////////////
-			averageTimer->startTimer();
+            averageTimer->startTimer();
         }
         //////////////////////////////////////////////////////////////////////////////
 
@@ -609,33 +586,33 @@ void Simulation::run()
         //set MP-Time
         if (para->getUseMeasurePoints())
         {
-            if ((t%para->getTimestepForMP()) == 0)
+            if ((timestep%para->getTimestepForMP()) == 0)
             {
                 unsigned int valuesPerClockCycle = (unsigned int)(para->getclockCycleForMP() / para->getTimestepForMP());
                 for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
                 {
-                    //output << "start level = " << lev << "\n";
-                    LBCalcMeasurePoints27(  para->getParD(lev)->VxMP,			para->getParD(lev)->VyMP,			para->getParD(lev)->VzMP,
-                    				        para->getParD(lev)->RhoMP,		    para->getParD(lev)->kMP,			para->getParD(lev)->numberOfPointskMP,
-                    				        valuesPerClockCycle,				t_MP,								para->getParD(lev)->typeOfGridNode,
-                    				        para->getParD(lev)->neighborX,   para->getParD(lev)->neighborY,	para->getParD(lev)->neighborZ,
-                    				        para->getParD(lev)->numberOfNodes,	para->getParD(lev)->distributions.f[0],		para->getParD(lev)->numberofthreads,
-                    				        para->getParD(lev)->isEvenTimestep);
+                    // VF_LOG_INFO("start level = {}", lev);
+                    LBCalcMeasurePoints27(  para->getParD(lev)->VxMP,            para->getParD(lev)->VyMP,                 para->getParD(lev)->VzMP,
+                                            para->getParD(lev)->RhoMP,           para->getParD(lev)->kMP,                  para->getParD(lev)->numberOfPointskMP,
+                                            valuesPerClockCycle,                 t_MP,                                     para->getParD(lev)->typeOfGridNode,
+                                            para->getParD(lev)->neighborX,       para->getParD(lev)->neighborY,            para->getParD(lev)->neighborZ,
+                                            para->getParD(lev)->numberOfNodes,   para->getParD(lev)->distributions.f[0],   para->getParD(lev)->numberofthreads,
+                                            para->getParD(lev)->isEvenTimestep);
                 }
                 t_MP++;
             }
 
             //Copy Measure Values
-            if ((t % (unsigned int)para->getclockCycleForMP()) == 0)
+            if ((timestep % (unsigned int)para->getclockCycleForMP()) == 0)
             {
                 for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
                 {
                     cudaMemoryManager->cudaCopyMeasurePointsToHost(lev);
                     para->copyMeasurePointsArrayToVector(lev);
-                    output << "\n Write MeasurePoints at level = " << lev << " and timestep = " << t << "\n";
+                    VF_LOG_INFO("Write MeasurePoints at level = {} and timestep = {}", lev, timestep);
                     for (int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
                     {
-                        MeasurePointWriter::writeMeasurePoints(para.get(), lev, j, t);
+                        MeasurePointWriter::writeMeasurePoints(para.get(), lev, j, timestep);
                     }
                     //MeasurePointWriter::calcAndWriteMeanAndFluctuations(para.get(), lev, t, para->getTStartOut());
                 }
@@ -653,40 +630,40 @@ void Simulation::run()
         if (para->getDiffOn() && para->getCalcPlaneConc())
         {
             PlaneConcThS27( para->getParD(0)->ConcPlaneIn,
-            		       para->getParD(0)->cpTopIndex,
-            		       para->getParD(0)->numberOfPointsCpTop,
-            		       para->getParD(0)->typeOfGridNode,
-            		       para->getParD(0)->neighborX,
-            		       para->getParD(0)->neighborY,
-            		       para->getParD(0)->neighborZ,
-            		       para->getParD(0)->numberOfNodes,
-            		       para->getParD(0)->numberofthreads,
-            		       para->getParD(0)->distributionsAD27.f[0],
-            		       para->getParD(0)->isEvenTimestep);
+                           para->getParD(0)->cpTopIndex,
+                           para->getParD(0)->numberOfPointsCpTop,
+                           para->getParD(0)->typeOfGridNode,
+                           para->getParD(0)->neighborX,
+                           para->getParD(0)->neighborY,
+                           para->getParD(0)->neighborZ,
+                           para->getParD(0)->numberOfNodes,
+                           para->getParD(0)->numberofthreads,
+                           para->getParD(0)->distributionsAD27.f[0],
+                           para->getParD(0)->isEvenTimestep);
             getLastCudaError("PlaneConcThS27 execution failed");
             PlaneConcThS27( para->getParD(0)->ConcPlaneOut1,
-            		        para->getParD(0)->cpBottomIndex,
-            		        para->getParD(0)->numberOfPointsCpBottom,
-            		        para->getParD(0)->typeOfGridNode,
-            		        para->getParD(0)->neighborX,
-            		        para->getParD(0)->neighborY,
-            		        para->getParD(0)->neighborZ,
-            		        para->getParD(0)->numberOfNodes,
-            		        para->getParD(0)->numberofthreads,
-            		        para->getParD(0)->distributionsAD27.f[0],
-            		        para->getParD(0)->isEvenTimestep);
+                            para->getParD(0)->cpBottomIndex,
+                            para->getParD(0)->numberOfPointsCpBottom,
+                            para->getParD(0)->typeOfGridNode,
+                            para->getParD(0)->neighborX,
+                            para->getParD(0)->neighborY,
+                            para->getParD(0)->neighborZ,
+                            para->getParD(0)->numberOfNodes,
+                            para->getParD(0)->numberofthreads,
+                            para->getParD(0)->distributionsAD27.f[0],
+                            para->getParD(0)->isEvenTimestep);
             getLastCudaError("PlaneConcThS27 execution failed");
             PlaneConcThS27( para->getParD(0)->ConcPlaneOut2,
-            		        para->getParD(0)->pressureBC.kN,
-            		        para->getParD(0)->pressureBC.numberOfBCnodes,
-            		        para->getParD(0)->typeOfGridNode,
-            		        para->getParD(0)->neighborX,
-            		        para->getParD(0)->neighborY,
-            		        para->getParD(0)->neighborZ,
-            		        para->getParD(0)->numberOfNodes,
-            		        para->getParD(0)->numberofthreads,
-            		        para->getParD(0)->distributionsAD27.f[0],
-            		        para->getParD(0)->isEvenTimestep);
+                            para->getParD(0)->pressureBC.kN,
+                            para->getParD(0)->pressureBC.numberOfBCnodes,
+                            para->getParD(0)->typeOfGridNode,
+                            para->getParD(0)->neighborX,
+                            para->getParD(0)->neighborY,
+                            para->getParD(0)->neighborZ,
+                            para->getParD(0)->numberOfNodes,
+                            para->getParD(0)->numberofthreads,
+                            para->getParD(0)->distributionsAD27.f[0],
+                            para->getParD(0)->isEvenTimestep);
             getLastCudaError("PlaneConcThS27 execution failed");
             //////////////////////////////////////////////////////////////////////////////////
             ////Calculation of concentration at the plane
@@ -698,31 +675,30 @@ void Simulation::run()
 
 
 
-	  ////////////////////////////////////////////////////////////////////////////////
+      ////////////////////////////////////////////////////////////////////////////////
       // File IO
       ////////////////////////////////////////////////////////////////////////////////
       //communicator->startTimer();
-      if(para->getTOut()>0 && t%para->getTOut()==0 && t>para->getTStartOut())
+      if(para->getTimestepOut()>0 && timestep%para->getTimestepOut()==0 && timestep>para->getTimestepStartOut())
       {
-		  //////////////////////////////////////////////////////////////////////////////////
-		  //if (para->getParD(0)->evenOrOdd==true)  para->getParD(0)->evenOrOdd=false;
-		  //else                                    para->getParD(0)->evenOrOdd=true;
-		  //////////////////////////////////////////////////////////////////////////////////
+          //////////////////////////////////////////////////////////////////////////////////
+          //if (para->getParD(0)->evenOrOdd==true)  para->getParD(0)->evenOrOdd=false;
+          //else                                    para->getParD(0)->evenOrOdd=true;
+          //////////////////////////////////////////////////////////////////////////////////
 
-		//////////////////////////////////////////////////////////////////////////
-		averageTimer->stopTimer();
-		averageTimer->outputPerformance(t, para.get(), communicator);
-		//////////////////////////////////////////////////////////////////////////
+        //////////////////////////////////////////////////////////////////////////
+        averageTimer->stopTimer();
+        averageTimer->outputPerformance(timestep, para.get(), communicator);
+        //////////////////////////////////////////////////////////////////////////
 
          if( para->getPrintFiles() )
          {
-            output << "Write files t=" << t << "... ";
+            VF_LOG_INFO("Write files t = {} ...", timestep);
             for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
             {
-		        //////////////////////////////////////////////////////////////////////////
-		        //exchange data for valid post process
-                prepareExchangeMultiGPU(para.get(), lev, -1);
-		        exchangeMultiGPU(para.get(), communicator, cudaMemoryManager.get(), lev, -1);
+                //////////////////////////////////////////////////////////////////////////
+                //exchange data for valid post process
+                updateGrid27->exchangeData(lev);
                 //////////////////////////////////////////////////////////////////////////
                //if (para->getD3Qxx()==19)
                //{
@@ -732,26 +708,26 @@ void Simulation::run()
                //}
                //else if (para->getD3Qxx()==27)
                //{
-				   //if (para->getCalcMedian() && ((int)t > para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()))
-				   //{
-					  // unsigned int tdiff = t - t_prev;
-					  // CalcMacMedSP27(para->getParD(lev)->vx_SP_Med,
-				   //					  para->getParD(lev)->vy_SP_Med,
-				   //					  para->getParD(lev)->vz_SP_Med,
-				   //					  para->getParD(lev)->rho_SP_Med,
-				   //					  para->getParD(lev)->press_SP_Med,
-				   //					  para->getParD(lev)->geoSP,
-				   //					  para->getParD(lev)->neighborX_SP,
-				   //					  para->getParD(lev)->neighborY_SP,
-				   //					  para->getParD(lev)->neighborZ_SP,
-				   //					  tdiff,
-				   //					  para->getParD(lev)->size_Mat_SP,
-				   //					  para->getParD(lev)->numberofthreads,
-				   //					  para->getParD(lev)->evenOrOdd);
-					  // getLastCudaError("CalcMacMedSP27 execution failed");
-				   //}
-
-				   //CalcMacSP27(para->getParD(lev)->vx_SP,
+                   //if (para->getCalcMedian() && ((int)t > para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()))
+                   //{
+                      // unsigned int tdiff = t - t_prev;
+                      // CalcMacMedSP27(para->getParD(lev)->vx_SP_Med,
+                   //                      para->getParD(lev)->vy_SP_Med,
+                   //                      para->getParD(lev)->vz_SP_Med,
+                   //                      para->getParD(lev)->rho_SP_Med,
+                   //                      para->getParD(lev)->press_SP_Med,
+                   //                      para->getParD(lev)->geoSP,
+                   //                      para->getParD(lev)->neighborX_SP,
+                   //                      para->getParD(lev)->neighborY_SP,
+                   //                      para->getParD(lev)->neighborZ_SP,
+                   //                      tdiff,
+                   //                      para->getParD(lev)->size_Mat_SP,
+                   //                      para->getParD(lev)->numberofthreads,
+                   //                      para->getParD(lev)->evenOrOdd);
+                      // getLastCudaError("CalcMacMedSP27 execution failed");
+                   //}
+
+                   //CalcMacSP27(para->getParD(lev)->vx_SP,
        //                        para->getParD(lev)->vy_SP,
        //                        para->getParD(lev)->vz_SP,
        //                        para->getParD(lev)->rho,
@@ -767,86 +743,86 @@ void Simulation::run()
        //            getLastCudaError("CalcMacSP27 execution failed");
 
 
-				   CalcMacCompSP27(para->getParD(lev)->velocityX,
-								   para->getParD(lev)->velocityY,
-								   para->getParD(lev)->velocityZ,
-								   para->getParD(lev)->rho,
-								   para->getParD(lev)->pressure,
-								   para->getParD(lev)->typeOfGridNode,
-								   para->getParD(lev)->neighborX,
-								   para->getParD(lev)->neighborY,
-								   para->getParD(lev)->neighborZ,
-								   para->getParD(lev)->numberOfNodes,
-								   para->getParD(lev)->numberofthreads,
-								   para->getParD(lev)->distributions.f[0],
-								   para->getParD(lev)->isEvenTimestep);
+                   CalcMacCompSP27(para->getParD(lev)->velocityX,
+                                   para->getParD(lev)->velocityY,
+                                   para->getParD(lev)->velocityZ,
+                                   para->getParD(lev)->rho,
+                                   para->getParD(lev)->pressure,
+                                   para->getParD(lev)->typeOfGridNode,
+                                   para->getParD(lev)->neighborX,
+                                   para->getParD(lev)->neighborY,
+                                   para->getParD(lev)->neighborZ,
+                                   para->getParD(lev)->numberOfNodes,
+                                   para->getParD(lev)->numberofthreads,
+                                   para->getParD(lev)->distributions.f[0],
+                                   para->getParD(lev)->isEvenTimestep);
                    getLastCudaError("CalcMacSP27 execution failed");
 
-				// // overwrite with wall nodes
-				//    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-				// 							   para->getParD(lev)->velocityX,
-				// 							   para->getParD(lev)->velocityY,
-				// 							   para->getParD(lev)->velocityZ,
-				// 							   para->getParD(lev)->geometryBC.Vx,
-				// 							   para->getParD(lev)->geometryBC.Vy,
-				// 							   para->getParD(lev)->geometryBC.Vz,
-				// 							   para->getParD(lev)->geometryBC.numberOfBCnodes,
-				// 							   para->getParD(lev)->geometryBC.k,
-				// 							   para->getParD(lev)->rho,
-				// 							   para->getParD(lev)->pressure,
-				// 							   para->getParD(lev)->typeOfGridNode,
-				// 							   para->getParD(lev)->neighborX,
-				// 							   para->getParD(lev)->neighborY,
-				// 							   para->getParD(lev)->neighborZ,
-				// 							   para->getParD(lev)->size_Mat,
-				// 							   para->getParD(lev)->distributions.f[0],
-				// 							   para->getParD(lev)->isEvenTimestep);
+                // // overwrite with wall nodes
+                //    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
+                //                                para->getParD(lev)->velocityX,
+                //                                para->getParD(lev)->velocityY,
+                //                                para->getParD(lev)->velocityZ,
+                //                                para->getParD(lev)->geometryBC.Vx,
+                //                                para->getParD(lev)->geometryBC.Vy,
+                //                                para->getParD(lev)->geometryBC.Vz,
+                //                                para->getParD(lev)->geometryBC.numberOfBCnodes,
+                //                                para->getParD(lev)->geometryBC.k,
+                //                                para->getParD(lev)->rho,
+                //                                para->getParD(lev)->pressure,
+                //                                para->getParD(lev)->typeOfGridNode,
+                //                                para->getParD(lev)->neighborX,
+                //                                para->getParD(lev)->neighborY,
+                //                                para->getParD(lev)->neighborZ,
+                //                                para->getParD(lev)->size_Mat,
+                //                                para->getParD(lev)->distributions.f[0],
+                //                                para->getParD(lev)->isEvenTimestep);
                 //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
 
-   				//    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
-				// 							   para->getParD(lev)->velocityX,
-				// 							   para->getParD(lev)->velocityY,
-				// 							   para->getParD(lev)->velocityZ,
-				// 							   para->getParD(lev)->velocityBC.Vx,
-				// 							   para->getParD(lev)->velocityBC.Vy,
-				// 							   para->getParD(lev)->velocityBC.Vz,
-				// 							   para->getParD(lev)->velocityBC.numberOfBCnodes,
-				// 							   para->getParD(lev)->velocityBC.k,
-				// 							   para->getParD(lev)->rho,
-				// 							   para->getParD(lev)->pressure,
-				// 							   para->getParD(lev)->typeOfGridNode,
-				// 							   para->getParD(lev)->neighborX,
-				// 							   para->getParD(lev)->neighborY,
-				// 							   para->getParD(lev)->neighborZ,
-				// 							   para->getParD(lev)->size_Mat,
-				// 							   para->getParD(lev)->distributions.f[0],
-				// 							   para->getParD(lev)->isEvenTimestep);
+                //    SetOutputWallVelocitySP27(  para->getParD(lev)->numberofthreads,
+                //                                para->getParD(lev)->velocityX,
+                //                                para->getParD(lev)->velocityY,
+                //                                para->getParD(lev)->velocityZ,
+                //                                para->getParD(lev)->velocityBC.Vx,
+                //                                para->getParD(lev)->velocityBC.Vy,
+                //                                para->getParD(lev)->velocityBC.Vz,
+                //                                para->getParD(lev)->velocityBC.numberOfBCnodes,
+                //                                para->getParD(lev)->velocityBC.k,
+                //                                para->getParD(lev)->rho,
+                //                                para->getParD(lev)->pressure,
+                //                                para->getParD(lev)->typeOfGridNode,
+                //                                para->getParD(lev)->neighborX,
+                //                                para->getParD(lev)->neighborY,
+                //                                para->getParD(lev)->neighborZ,
+                //                                para->getParD(lev)->size_Mat,
+                //                                para->getParD(lev)->distributions.f[0],
+                //                                para->getParD(lev)->isEvenTimestep);
                 //   getLastCudaError("SetOutputWallVelocitySP27 execution failed");
 
-				 //}
+                 //}
 
-				   cudaMemoryManager->cudaCopyPrint(lev);
-			   if (para->getCalcMedian())
-			   {
-				   cudaMemoryManager->cudaCopyMedianPrint(lev);
-			   }
+                   cudaMemoryManager->cudaCopyPrint(lev);
+               if (para->getCalcMedian())
+               {
+                   cudaMemoryManager->cudaCopyMedianPrint(lev);
+               }
 
-			   //////////////////////////////////////////////////////////////////////////
+               //////////////////////////////////////////////////////////////////////////
                //TODO: implement flag to write ASCII data
-			   if (para->getWriteVeloASCIIfiles())
-				   VeloASCIIWriter::writeVelocitiesAsTXT(para.get(), lev, t);
-			   //////////////////////////////////////////////////////////////////////////
+               if (para->getWriteVeloASCIIfiles())
+                   VeloASCIIWriter::writeVelocitiesAsTXT(para.get(), lev, timestep);
+               //////////////////////////////////////////////////////////////////////////
                if( this->kineticEnergyAnalyzer || this->enstrophyAnalyzer )
                {
-                   std::string fname = para->getFName() + "_ID_" + StringUtil::toString<int>(para->getMyID()) + "_t_" + StringUtil::toString<int>(t);
+                   std::string fname = para->getFName() + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep);
 
                    if (this->kineticEnergyAnalyzer) this->kineticEnergyAnalyzer->writeToFile(fname);
                    if (this->enstrophyAnalyzer)     this->enstrophyAnalyzer->writeToFile(fname);
                }
-			   //////////////////////////////////////////////////////////////////////////
+               //////////////////////////////////////////////////////////////////////////
 
 
-			   ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+               ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
                if (para->getDiffOn()==true)
                {
                   if (para->getDiffMod() == 7)
@@ -876,459 +852,459 @@ void Simulation::run()
                                     para->getParD(lev)->isEvenTimestep);
                   }
 
-				  cudaMemoryManager->cudaCopyConcentrationDeviceToHost(lev);
+                  cudaMemoryManager->cudaCopyConcentrationDeviceToHost(lev);
                   //cudaMemoryCopy(para->getParH(lev)->Conc, para->getParD(lev)->Conc,  para->getParH(lev)->mem_size_real_SP , cudaMemcpyDeviceToHost);
                }
                ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			   ////print cp
-			   //if ((para->getParH(lev)->cpTop.size() > 0) && (t > para->getTStartOut()))
-			   //{
-				  // printCpTopIntermediateStep(para, t, lev);
-			   //}
-			   ////////////////////////////////////////////////////////////////////////////////
-			   //MeasurePointWriter::writeSpacialAverageForXZSlices(para, lev, t);
-			   ////////////////////////////////////////////////////////////////////////////////
-			   //MeasurePointWriter::writeTestAcousticXY(para, lev, t);
-			   //MeasurePointWriter::writeTestAcousticYZ(para, lev, t);
-			   //MeasurePointWriter::writeTestAcousticXZ(para, lev, t);
-			   ////////////////////////////////////////////////////////////////////////
-			}
-
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-			////test print press mirror
-			//if (t > para->getTStartOut())
-			//{
-			//	////////////////////////////////////////////////////////////////////////////////
-			//	//Level 7
-			//	CalcCPtop27(para->getParD(7)->d0SP.f[0],
-			//		para->getParD(7)->cpTopIndex,
-			//		para->getParD(7)->numberOfPointsCpTop,
-			//		para->getParD(7)->cpPressTop,
-			//		para->getParD(7)->neighborX_SP,
-			//		para->getParD(7)->neighborY_SP,
-			//		para->getParD(7)->neighborZ_SP,
-			//		para->getParD(7)->size_Mat_SP,
-			//		para->getParD(7)->evenOrOdd,
-			//		para->getParD(7)->numberofthreads);
-			//	//////////////////////////////////////////////////////////////////////////////////
-			//	calcPressForMirror(para, 7);
-			//	////////////////////////////////////////////////////////////////////////////////
-			//	//Level 8
-			//	CalcCPtop27(para->getParD(8)->d0SP.f[0],
-			//		para->getParD(8)->cpTopIndex,
-			//		para->getParD(8)->numberOfPointsCpTop,
-			//		para->getParD(8)->cpPressTop,
-			//		para->getParD(8)->neighborX_SP,
-			//		para->getParD(8)->neighborY_SP,
-			//		para->getParD(8)->neighborZ_SP,
-			//		para->getParD(8)->size_Mat_SP,
-			//		para->getParD(8)->evenOrOdd,
-			//		para->getParD(8)->numberofthreads);
-			//	//////////////////////////////////////////////////////////////////////////////////
-			//	calcPressForMirror(para, 8);
-			//	////////////////////////////////////////////////////////////////////////////////
-			//	//print press mirror
-			//	printScalars(para, false);
-			//	////////////////////////////////////////////////////////////////////////////////
-			//}
-			//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-			//t_prev = t;
-
-			//////////////////////////////////////////////////////////////////////////
-			////Data Analysis
-			////AnalysisData::writeAnalysisData(para, t);
-			//AnalysisData::writeAnalysisDataX(para, t);
-			//AnalysisData::writeAnalysisDataZ(para, t);
-			//////////////////////////////////////////////////////////////////////////
+               ////print cp
+               //if ((para->getParH(lev)->cpTop.size() > 0) && (t > para->getTStartOut()))
+               //{
+                  // printCpTopIntermediateStep(para, t, lev);
+               //}
+               ////////////////////////////////////////////////////////////////////////////////
+               //MeasurePointWriter::writeSpacialAverageForXZSlices(para, lev, t);
+               ////////////////////////////////////////////////////////////////////////////////
+               //MeasurePointWriter::writeTestAcousticXY(para, lev, t);
+               //MeasurePointWriter::writeTestAcousticYZ(para, lev, t);
+               //MeasurePointWriter::writeTestAcousticXZ(para, lev, t);
+               ////////////////////////////////////////////////////////////////////////
+            }
+
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+            ////test print press mirror
+            //if (t > para->getTStartOut())
+            //{
+            //    ////////////////////////////////////////////////////////////////////////////////
+            //    //Level 7
+            //    CalcCPtop27(para->getParD(7)->d0SP.f[0],
+            //        para->getParD(7)->cpTopIndex,
+            //        para->getParD(7)->numberOfPointsCpTop,
+            //        para->getParD(7)->cpPressTop,
+            //        para->getParD(7)->neighborX_SP,
+            //        para->getParD(7)->neighborY_SP,
+            //        para->getParD(7)->neighborZ_SP,
+            //        para->getParD(7)->size_Mat_SP,
+            //        para->getParD(7)->evenOrOdd,
+            //        para->getParD(7)->numberofthreads);
+            //    //////////////////////////////////////////////////////////////////////////////////
+            //    calcPressForMirror(para, 7);
+            //    ////////////////////////////////////////////////////////////////////////////////
+            //    //Level 8
+            //    CalcCPtop27(para->getParD(8)->d0SP.f[0],
+            //        para->getParD(8)->cpTopIndex,
+            //        para->getParD(8)->numberOfPointsCpTop,
+            //        para->getParD(8)->cpPressTop,
+            //        para->getParD(8)->neighborX_SP,
+            //        para->getParD(8)->neighborY_SP,
+            //        para->getParD(8)->neighborZ_SP,
+            //        para->getParD(8)->size_Mat_SP,
+            //        para->getParD(8)->evenOrOdd,
+            //        para->getParD(8)->numberofthreads);
+            //    //////////////////////////////////////////////////////////////////////////////////
+            //    calcPressForMirror(para, 8);
+            //    ////////////////////////////////////////////////////////////////////////////////
+            //    //print press mirror
+            //    printScalars(para, false);
+            //    ////////////////////////////////////////////////////////////////////////////////
+            //}
+            //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+            //t_prev = t;
+
+            //////////////////////////////////////////////////////////////////////////
+            ////Data Analysis
+            ////AnalysisData::writeAnalysisData(para, t);
+            //AnalysisData::writeAnalysisDataX(para, t);
+            //AnalysisData::writeAnalysisDataZ(para, t);
+            //////////////////////////////////////////////////////////////////////////
 
             ////////////////////////////////////////////////////////////////////////
             //pressure difference
             ////////////////////////////////////////////////////////////////////////
-			   //if (para->getMyID() == para->getPressInID())       calcPressure(para,  "in", 0);
-			   //else if (para->getMyID() == para->getPressOutID()) calcPressure(para, "out", 0);
+               //if (para->getMyID() == para->getPressInID())       calcPressure(para,  "in", 0);
+               //else if (para->getMyID() == para->getPressOutID()) calcPressure(para, "out", 0);
             ////////////////////////////////////////////////////////////////////////
             //flow rate
             ////////////////////////////////////////////////////////////////////////
-		      //calcFlowRate(para, 0);
+              //calcFlowRate(para, 0);
+            ////////////////////////////////////////////////////////////////////////
+
+            ////////////////////////////////////////////////////////////////////////
+            //calculate 2nd, 3rd and higher order moments
+            ////////////////////////////////////////////////////////////////////////
+            if (para->getCalc2ndOrderMoments())  calc2ndMoments(para.get(), cudaMemoryManager.get());
+            if (para->getCalc3rdOrderMoments())  calc3rdMoments(para.get(), cudaMemoryManager.get());
+            if (para->getCalcHighOrderMoments()) calcHigherOrderMoments(para.get(), cudaMemoryManager.get());
             ////////////////////////////////////////////////////////////////////////
 
-			////////////////////////////////////////////////////////////////////////
-			//calculate 2nd, 3rd and higher order moments
-			////////////////////////////////////////////////////////////////////////
-			if (para->getCalc2ndOrderMoments())  calc2ndMoments(para.get(), cudaMemoryManager.get());
-			if (para->getCalc3rdOrderMoments())  calc3rdMoments(para.get(), cudaMemoryManager.get());
-			if (para->getCalcHighOrderMoments()) calcHigherOrderMoments(para.get(), cudaMemoryManager.get());
-			////////////////////////////////////////////////////////////////////////
-
-			////////////////////////////////////////////////////////////////////////
-			//calculate median on host
-			////////////////////////////////////////////////////////////////////////
-			if (para->getCalcMedian() && ((int)t > para->getTimeCalcMedStart()) && ((int)t <= para->getTimeCalcMedEnd()) && ((t%(unsigned int)para->getclockCycleForMP())==0))
-			{
-				unsigned int tdiff = t - t_prev;
-				calcMedian(para.get(), tdiff);
-
-				/////////////////////////////////
-				//added for incremental averaging
-				t_prev = t;
-				resetMedian(para.get());
-				/////////////////////////////////
-			}
+            ////////////////////////////////////////////////////////////////////////
+            //calculate median on host
+            ////////////////////////////////////////////////////////////////////////
+            if (para->getCalcMedian() && ((int)timestep > para->getTimeCalcMedStart()) && ((int)timestep <= para->getTimeCalcMedEnd()) && ((timestep%(unsigned int)para->getclockCycleForMP())==0))
+            {
+                unsigned int tdiff = timestep - t_prev;
+                calcMedian(para.get(), tdiff);
+
+                /////////////////////////////////
+                //added for incremental averaging
+                t_prev = timestep;
+                resetMedian(para.get());
+                /////////////////////////////////
+            }
             if (para->getCalcTurbulenceIntensity())
-			{
-                uint t_diff = t - t_turbulenceIntensity;
+            {
+                uint t_diff = timestep - t_turbulenceIntensity;
                 calcTurbulenceIntensity(para.get(), cudaMemoryManager.get(), t_diff);
                 //writeAllTiDatafToFile(para.get(), t);
             }
-			////////////////////////////////////////////////////////////////////////
-			dataWriter->writeTimestep(para, t);
-			////////////////////////////////////////////////////////////////////////
+            ////////////////////////////////////////////////////////////////////////
+            dataWriter->writeTimestep(para, timestep);
+            ////////////////////////////////////////////////////////////////////////
             if (para->getCalcTurbulenceIntensity()) {
-                t_turbulenceIntensity = t;
+                t_turbulenceIntensity = timestep;
                 resetVelocityFluctuationsAndMeans(para.get(), cudaMemoryManager.get());
             }
-			////////////////////////////////////////////////////////////////////////
-            if (para->getCalcDragLift()) printDragLift(para.get(), cudaMemoryManager.get(), t);
-			////////////////////////////////////////////////////////////////////////
-			if (para->getCalcParticle()) copyAndPrintParticles(para.get(), cudaMemoryManager.get(), t, false);
-			////////////////////////////////////////////////////////////////////////
-			output << "done.\n";
-			////////////////////////////////////////////////////////////////////////
+            ////////////////////////////////////////////////////////////////////////
+            if (para->getCalcDragLift()) printDragLift(para.get(), cudaMemoryManager.get(), timestep);
+            ////////////////////////////////////////////////////////////////////////
+            if (para->getCalcParticles()) copyAndPrintParticles(para.get(), cudaMemoryManager.get(), timestep, false);
+            ////////////////////////////////////////////////////////////////////////
+            VF_LOG_INFO("... done");
+            ////////////////////////////////////////////////////////////////////////
          }
 
-		////////////////////////////////////////////////////////////////////////
-		averageTimer->startTimer();
+        ////////////////////////////////////////////////////////////////////////
+        averageTimer->startTimer();
       }
-	}
-
-	/////////////////////////////////////////////////////////////////////////
-
-	////////////////////////////////////////////////////////////////////////////////
-	//printDragLift(para);
-	////////////////////////////////////////////////////////////////////////////////
-
-	////////////////////////////////////////////////////////////////////////////////
-	if (para->getDiffOn()==true) printPlaneConc(para.get(), cudaMemoryManager.get());
-	////////////////////////////////////////////////////////////////////////////////
-
-	////////////////////////////////////////////////////////////////////////////////
-	////for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-	////{
-	////	if (para->getParH(lev)->cpTop.size() > 0)
-	////	{
-	////		printCpTop(para, lev);
-	////	}
-	////}
-	//for (int lev = 7; lev <= 8; lev++)
-	//{
-	//	printCpTop(para, lev);
-	//}
-	////printCpTop(para);
-	////printCpBottom(para);
-	////printCpBottom2(para);
-	////////////////////////////////////////////////////////////////////////////////
+    }
+
+    /////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////////
+    //printDragLift(para);
+    ////////////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////////
+    if (para->getDiffOn()==true) printPlaneConc(para.get(), cudaMemoryManager.get());
+    ////////////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////////
+    ////for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+    ////{
+    ////    if (para->getParH(lev)->cpTop.size() > 0)
+    ////    {
+    ////        printCpTop(para, lev);
+    ////    }
+    ////}
+    //for (int lev = 7; lev <= 8; lev++)
+    //{
+    //    printCpTop(para, lev);
+    //}
+    ////printCpTop(para);
+    ////printCpBottom(para);
+    ////printCpBottom2(para);
+    ////////////////////////////////////////////////////////////////////////////////
 
  //  //////////////////////////////////////////////////////////////////////////
  //  //Copy Measure Values
-	//for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
-	//{
-	//	output << "\n Copy MeasurePoints at level = " << lev <<"\n";
-	//	para->cudaCopyMeasurePointsToHost(lev);
-	//	para->copyMeasurePointsArrayToVector(lev);
-	//	output << "\n Write MeasurePoints at level = " << lev <<"\n";
-	//	for(int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
-	//	{
-	//		MeasurePointWriter::writeMeasurePoints(para, lev, j, 0);
-	//	}
-	//}
+    //for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
+    //{
+    //    VF_LOG_INFO("Copy MeasurePoints at level = {}", lev);
+    //    para->cudaCopyMeasurePointsToHost(lev);
+    //    para->copyMeasurePointsArrayToVector(lev);
+    //    VF_LOG_INFO("Write MeasurePoints at level = {}", lev);
+    //    for(int j = 0; j < (int)para->getParH(lev)->MP.size(); j++)
+    //    {
+    //        MeasurePointWriter::writeMeasurePoints(para, lev, j, 0);
+    //    }
+    //}
  //  //////////////////////////////////////////////////////////////////////////
 }
 
 void Simulation::porousMedia()
 {
-	double porosity, darcySI, forchheimerSI;
-	double dxLBM = 0.00390625;
-	double dtLBM = 0.00000658;
-	unsigned int level, geo;
-	double startX, startY, startZ, endX, endY, endZ;
-	//////////////////////////////////////////////////////////////////////////
-
-	////////////////////////////////////////////////////////////////////////////
-	////Test = porous media 0
-	//porosity = 0.7;
-	//darcySI = 137.36; //[1/s]
-	//forchheimerSI = 1037.8; //[1/m]
-	//level = para->getFine();
-	//geo = GEO_PM_0;
-	//startX = 20.0;
-	//startY =  0.0;
-	//startZ =  0.0;
-	//endX = 40.0;
-	//endY = 22.0;
-	//endZ = 22.0;
-	//pm[0] = new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level);
-	//pm[0]->setStartCoordinates(startX, startY, startZ);
-	//pm[0]->setEndCoordinates(endX, endY, endZ);
-	//pm[0]->setResistanceLBM();
-	//definePMarea(pm[0]);
-	////////////////////////////////////////////////////////////////////////////
-
-	//////////////////////////////////////////////////////////////////////////
-	//Kondensator = porous media 0
-	porosity = 0.7;
-	darcySI = 137.36; //[1/s]
-	forchheimerSI = 1037.8; //[1/m]
-	level = para->getFine();
-	geo = GEO_PM_0;
-	startX = -0.715882;
-	startY = -0.260942;
-	startZ = -0.031321;
-	endX = -0.692484;
-	endY =  0.277833;
-	endZ =  0.360379;
-	pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
-	int n = (int)pm.size() - 1;
-	pm.at(n)->setStartCoordinates(startX, startY, startZ);
-	pm.at(n)->setEndCoordinates(endX, endY, endZ);
-	pm.at(n)->setResistanceLBM();
-	definePMarea(pm.at(n));
-	//////////////////////////////////////////////////////////////////////////
-
-	//////////////////////////////////////////////////////////////////////////
-	//NT-Kuehler = porous media 1
-	porosity = 0.6;
-	darcySI = 149.98; //[1/s]
-	forchheimerSI = 960.57; //[1/m]
-	level = para->getFine();
-	geo = GEO_PM_1;
-	startX = -0.696146;
-	startY = -0.32426;
-	startZ = -0.0421345;
-	endX = -0.651847;
-	endY =  0.324822;
-	endZ =  0.057098;
-	pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
-	n = (int)pm.size() - 1;
-	pm.at(n)->setStartCoordinates(startX, startY, startZ);
-	pm.at(n)->setEndCoordinates(endX, endY, endZ);
-	pm.at(n)->setResistanceLBM();
-	definePMarea(pm.at(n));
-	//////////////////////////////////////////////////////////////////////////
-
-	//////////////////////////////////////////////////////////////////////////
-	//Wasserkuehler = porous media 2
-	porosity = 0.6;
-	darcySI = 148.69; //[1/s]
-	forchheimerSI = 629.45; //[1/m]
-	level = para->getFine();
-	geo = GEO_PM_2;
-	startX = -0.692681;
-	startY = -0.324954;
-	startZ = 0.0789429;
-	endX = -0.657262;
-	endY =  0.32538;
-	endZ =  0.400974;
-	pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
-	n = (int)pm.size() - 1;
-	pm.at(n)->setStartCoordinates(startX, startY, startZ);
-	pm.at(n)->setEndCoordinates(endX, endY, endZ);
-	pm.at(n)->setResistanceLBM();
-	definePMarea(pm.at(n));
-	//////////////////////////////////////////////////////////////////////////
+    double porosity, darcySI, forchheimerSI;
+    double dxLBM = 0.00390625;
+    double dtLBM = 0.00000658;
+    unsigned int level, geo;
+    double startX, startY, startZ, endX, endY, endZ;
+    //////////////////////////////////////////////////////////////////////////
+
+    ////////////////////////////////////////////////////////////////////////////
+    ////Test = porous media 0
+    //porosity = 0.7;
+    //darcySI = 137.36; //[1/s]
+    //forchheimerSI = 1037.8; //[1/m]
+    //level = para->getFine();
+    //geo = GEO_PM_0;
+    //startX = 20.0;
+    //startY =  0.0;
+    //startZ =  0.0;
+    //endX = 40.0;
+    //endY = 22.0;
+    //endZ = 22.0;
+    //pm[0] = new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level);
+    //pm[0]->setStartCoordinates(startX, startY, startZ);
+    //pm[0]->setEndCoordinates(endX, endY, endZ);
+    //pm[0]->setResistanceLBM();
+    //definePMarea(pm[0]);
+    ////////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //Kondensator = porous media 0
+    porosity = 0.7;
+    darcySI = 137.36; //[1/s]
+    forchheimerSI = 1037.8; //[1/m]
+    level = para->getFine();
+    geo = GEO_PM_0;
+    startX = -0.715882;
+    startY = -0.260942;
+    startZ = -0.031321;
+    endX = -0.692484;
+    endY =  0.277833;
+    endZ =  0.360379;
+    pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
+    int n = (int)pm.size() - 1;
+    pm.at(n)->setStartCoordinates(startX, startY, startZ);
+    pm.at(n)->setEndCoordinates(endX, endY, endZ);
+    pm.at(n)->setResistanceLBM();
+    definePMarea(pm.at(n));
+    //////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //NT-Kuehler = porous media 1
+    porosity = 0.6;
+    darcySI = 149.98; //[1/s]
+    forchheimerSI = 960.57; //[1/m]
+    level = para->getFine();
+    geo = GEO_PM_1;
+    startX = -0.696146;
+    startY = -0.32426;
+    startZ = -0.0421345;
+    endX = -0.651847;
+    endY =  0.324822;
+    endZ =  0.057098;
+    pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
+    n = (int)pm.size() - 1;
+    pm.at(n)->setStartCoordinates(startX, startY, startZ);
+    pm.at(n)->setEndCoordinates(endX, endY, endZ);
+    pm.at(n)->setResistanceLBM();
+    definePMarea(pm.at(n));
+    //////////////////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////////////////////////////////
+    //Wasserkuehler = porous media 2
+    porosity = 0.6;
+    darcySI = 148.69; //[1/s]
+    forchheimerSI = 629.45; //[1/m]
+    level = para->getFine();
+    geo = GEO_PM_2;
+    startX = -0.692681;
+    startY = -0.324954;
+    startZ = 0.0789429;
+    endX = -0.657262;
+    endY =  0.32538;
+    endZ =  0.400974;
+    pm.push_back(std::shared_ptr<PorousMedia>(new PorousMedia(porosity, geo, darcySI, forchheimerSI, dxLBM, dtLBM, level)));
+    n = (int)pm.size() - 1;
+    pm.at(n)->setStartCoordinates(startX, startY, startZ);
+    pm.at(n)->setEndCoordinates(endX, endY, endZ);
+    pm.at(n)->setResistanceLBM();
+    definePMarea(pm.at(n));
+    //////////////////////////////////////////////////////////////////////////
 
 }
 
 void Simulation::definePMarea(std::shared_ptr<PorousMedia>& pMedia)
 {
-	unsigned int counter = 0;
-	unsigned int level = pMedia->getLevelPM();
-	std::vector< unsigned int > nodeIDsPorousMedia;
-	output << "definePMarea....find nodes \n";
-
-	for (unsigned int i = 0; i < para->getParH(level)->numberOfNodes; i++)
-	{
-		if (((para->getParH(level)->coordinateX[i] >= pMedia->getStartX()) && (para->getParH(level)->coordinateX[i] <= pMedia->getEndX())) &&
-			((para->getParH(level)->coordinateY[i] >= pMedia->getStartY()) && (para->getParH(level)->coordinateY[i] <= pMedia->getEndY())) &&
-			((para->getParH(level)->coordinateZ[i] >= pMedia->getStartZ()) && (para->getParH(level)->coordinateZ[i] <= pMedia->getEndZ())) )
-		{
-			if (para->getParH(level)->typeOfGridNode[i] >= GEO_FLUID)
-			{
-				para->getParH(level)->typeOfGridNode[i] = pMedia->getGeoID();
-				nodeIDsPorousMedia.push_back(i);
-				counter++;
-			}
-		}
-	}
-
-	output << "definePMarea....cuda copy SP \n";
-	cudaMemoryManager->cudaCopySP(level);
-	pMedia->setSizePM(counter);
-	output << "definePMarea....cuda alloc PM \n";
-	cudaMemoryManager->cudaAllocPorousMedia(pMedia.get(), level);
-	unsigned int *tpmArrayIDs = pMedia->getHostNodeIDsPM();
-
-	output << "definePMarea....copy vector to array \n";
-	for (unsigned int j = 0; j < pMedia->getSizePM(); j++)
-	{
-		tpmArrayIDs[j] = nodeIDsPorousMedia[j];
-	}
-
-	pMedia->setHostNodeIDsPM(tpmArrayIDs);
-	output << "definePMarea....cuda copy PM \n";
-	cudaMemoryManager->cudaCopyPorousMedia(pMedia.get(), level);
+    unsigned int counter = 0;
+    unsigned int level = pMedia->getLevelPM();
+    std::vector< unsigned int > nodeIDsPorousMedia;
+    VF_LOG_INFO("definePMarea....find nodes");
+
+    for (unsigned int i = 0; i < para->getParH(level)->numberOfNodes; i++)
+    {
+        if (((para->getParH(level)->coordinateX[i] >= pMedia->getStartX()) && (para->getParH(level)->coordinateX[i] <= pMedia->getEndX())) &&
+            ((para->getParH(level)->coordinateY[i] >= pMedia->getStartY()) && (para->getParH(level)->coordinateY[i] <= pMedia->getEndY())) &&
+            ((para->getParH(level)->coordinateZ[i] >= pMedia->getStartZ()) && (para->getParH(level)->coordinateZ[i] <= pMedia->getEndZ())) )
+        {
+            if (para->getParH(level)->typeOfGridNode[i] >= GEO_FLUID)
+            {
+                para->getParH(level)->typeOfGridNode[i] = pMedia->getGeoID();
+                nodeIDsPorousMedia.push_back(i);
+                counter++;
+            }
+        }
+    }
+
+    VF_LOG_INFO("definePMarea....cuda copy SP");
+    cudaMemoryManager->cudaCopySP(level);
+    pMedia->setSizePM(counter);
+    VF_LOG_INFO("definePMarea....cuda alloc PM");
+    cudaMemoryManager->cudaAllocPorousMedia(pMedia.get(), level);
+    unsigned int *tpmArrayIDs = pMedia->getHostNodeIDsPM();
+
+    VF_LOG_INFO("definePMarea....copy vector to array");
+    for (unsigned int j = 0; j < pMedia->getSizePM(); j++)
+    {
+        tpmArrayIDs[j] = nodeIDsPorousMedia[j];
+    }
+
+    pMedia->setHostNodeIDsPM(tpmArrayIDs);
+    VF_LOG_INFO("definePMarea....cuda copy PM");
+    cudaMemoryManager->cudaCopyPorousMedia(pMedia.get(), level);
 }
 
 Simulation::~Simulation()
 {
-	// Cuda Streams
+    // Cuda Streams
     if (para->getUseStreams()) {
         para->getStreamManager()->destroyCudaEvents();
         para->getStreamManager()->terminateStreams();
-	}
+    }
 
-	//CudaFreeHostMemory
+    //CudaFreeHostMemory
     for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-	{
-		//para->cudaFreeFull(lev);
-		cudaMemoryManager->cudaFreeCoord(lev);
-		cudaMemoryManager->cudaFreeSP(lev);
-		if (para->getCalcMedian())
-		{
-			cudaMemoryManager->cudaFreeMedianSP(lev);
-		}
-		//para->cudaFreeVeloBC(lev);
-		//para->cudaFreeWallBC(lev);
-		//para->cudaFreeVeloBC(lev);
-		//para->cudaFreeInlet(lev);
-		//para->cudaFreeOutlet(lev);
-		//para->cudaFreeGeomBC(lev);
-		//para->cudaFreePress(lev);
-	}
-	if (para->getMaxLevel()>1)
-	{
-		for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
-		{
-			cudaMemoryManager->cudaFreeInterfaceCF(lev);
-			cudaMemoryManager->cudaFreeInterfaceFC(lev);
-			cudaMemoryManager->cudaFreeInterfaceOffCF(lev);
-			cudaMemoryManager->cudaFreeInterfaceOffFC(lev);
-			//para->cudaFreePressX1(lev);
-		}
-	}
-	//para->cudaFreeVeloBC(0); //level = 0
-	//para->cudaFreePressBC();
-	//para->cudaFreeVeloPropeller(para->getFine());
-	//para->cudaFreePressX0(para->getCoarse());
-
-	//////////////////////////////////////////////////////////////////////////
-	//Temp
-	if (para->getDiffOn() == true)
-	{
-		for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
-		{
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc_Full));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.temp));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.k));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.temp));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.velo));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.k));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.temp));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.velo));
-			checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.k));
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-
-
-	//////////////////////////////////////////////////////////////////////////
-	//free second order moments
-	if (para->getCalc2ndOrderMoments())
-	{
-		for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-		{
-			cudaMemoryManager->cudaFree2ndMoments(lev);
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-	//free third order moments
-	if (para->getCalc3rdOrderMoments())
-	{
-		for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-		{
-			cudaMemoryManager->cudaFree3rdMoments(lev);
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-	//free higher order moments
-	if (para->getCalcHighOrderMoments())
-	{
-		for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
-		{
-			cudaMemoryManager->cudaFreeHigherMoments(lev);
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-
-
-	//////////////////////////////////////////////////////////////////////////
-	//Multi GPU
-	//////////////////////////////////////////////////////////////////////////
-	////1D domain decomposition
-	//if (para->getNumprocs() > 1)
-	//{
-	// for (int lev=para->getCoarse(); lev < para->getFine(); lev++)
-	// {
-	//  for (unsigned int i=0; i < para->getNumberOfProcessNeighbors(lev, "send"); i++)
-	//  {
-	//   para->cudaFreeProcessNeighbor(lev, i);
-	//  }
-	// }
-	//}
-	//////////////////////////////////////////////////////////////////////////
-	//3D domain decomposition
-	if (para->getNumprocs() > 1)
-	{
-		for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
-		{
-			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsX(lev, "send"); i++)
-			{
-				cudaMemoryManager->cudaFreeProcessNeighborX(lev, i);
-			}
-			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsY(lev, "send"); i++)
-			{
-				cudaMemoryManager->cudaFreeProcessNeighborY(lev, i);
-			}
-			//////////////////////////////////////////////////////////////////////////
-			for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsZ(lev, "send"); i++)
-			{
-				cudaMemoryManager->cudaFreeProcessNeighborZ(lev, i);
-			}
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-	//Normals
-	if (para->getIsGeoNormal()) {
-		for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
-		{
-			cudaMemoryManager->cudaFreeGeomNormals(lev);
-		}
-	}
-	//////////////////////////////////////////////////////////////////////////
-	// Turbulence Intensity
-	if (para->getCalcTurbulenceIntensity()) {
+    {
+        //para->cudaFreeFull(lev);
+        cudaMemoryManager->cudaFreeCoord(lev);
+        cudaMemoryManager->cudaFreeSP(lev);
+        if (para->getCalcMedian())
+        {
+            cudaMemoryManager->cudaFreeMedianSP(lev);
+        }
+        //para->cudaFreeVeloBC(lev);
+        //para->cudaFreeWallBC(lev);
+        //para->cudaFreeVeloBC(lev);
+        //para->cudaFreeInlet(lev);
+        //para->cudaFreeOutlet(lev);
+        //para->cudaFreeGeomBC(lev);
+        //para->cudaFreePress(lev);
+    }
+    if (para->getMaxLevel()>1)
+    {
+        for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
+        {
+            cudaMemoryManager->cudaFreeInterfaceCF(lev);
+            cudaMemoryManager->cudaFreeInterfaceFC(lev);
+            cudaMemoryManager->cudaFreeInterfaceOffCF(lev);
+            cudaMemoryManager->cudaFreeInterfaceOffFC(lev);
+            //para->cudaFreePressX1(lev);
+        }
+    }
+    //para->cudaFreeVeloBC(0); //level = 0
+    //para->cudaFreePressBC();
+    //para->cudaFreeVeloPropeller(para->getFine());
+    //para->cudaFreePressX0(para->getCoarse());
+
+    //////////////////////////////////////////////////////////////////////////
+    //Temp
+    if (para->getDiffOn() == true)
+    {
+        for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
+        {
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc_Full));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->Conc));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.temp));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->Temp.k));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.temp));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.velo));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempVel.k));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.temp));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.velo));
+            checkCudaErrors(cudaFreeHost(para->getParH(lev)->TempPress.k));
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+
+
+    //////////////////////////////////////////////////////////////////////////
+    //free second order moments
+    if (para->getCalc2ndOrderMoments())
+    {
+        for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+        {
+            cudaMemoryManager->cudaFree2ndMoments(lev);
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+    //free third order moments
+    if (para->getCalc3rdOrderMoments())
+    {
+        for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+        {
+            cudaMemoryManager->cudaFree3rdMoments(lev);
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+    //free higher order moments
+    if (para->getCalcHighOrderMoments())
+    {
+        for (int lev = para->getCoarse(); lev <= para->getFine(); lev++)
+        {
+            cudaMemoryManager->cudaFreeHigherMoments(lev);
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+
+
+    //////////////////////////////////////////////////////////////////////////
+    //Multi GPU
+    //////////////////////////////////////////////////////////////////////////
+    ////1D domain decomposition
+    //if (para->getNumprocs() > 1)
+    //{
+    // for (int lev=para->getCoarse(); lev < para->getFine(); lev++)
+    // {
+    //  for (unsigned int i=0; i < para->getNumberOfProcessNeighbors(lev, "send"); i++)
+    //  {
+    //   para->cudaFreeProcessNeighbor(lev, i);
+    //  }
+    // }
+    //}
+    //////////////////////////////////////////////////////////////////////////
+    //3D domain decomposition
+    if (para->getNumprocs() > 1)
+    {
+        for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
+        {
+            //////////////////////////////////////////////////////////////////////////
+            for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsX(lev, "send"); i++)
+            {
+                cudaMemoryManager->cudaFreeProcessNeighborX(lev, i);
+            }
+            //////////////////////////////////////////////////////////////////////////
+            for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsY(lev, "send"); i++)
+            {
+                cudaMemoryManager->cudaFreeProcessNeighborY(lev, i);
+            }
+            //////////////////////////////////////////////////////////////////////////
+            for (unsigned int i = 0; i < para->getNumberOfProcessNeighborsZ(lev, "send"); i++)
+            {
+                cudaMemoryManager->cudaFreeProcessNeighborZ(lev, i);
+            }
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+    //Normals
+    if (para->getIsGeoNormal()) {
+        for (int lev = para->getCoarse(); lev < para->getFine(); lev++)
+        {
+            cudaMemoryManager->cudaFreeGeomNormals(lev);
+        }
+    }
+    //////////////////////////////////////////////////////////////////////////
+    // Turbulence Intensity
+    if (para->getCalcTurbulenceIntensity()) {
         cudaFreeTurbulenceIntensityArrays(para.get(), cudaMemoryManager.get());
-	//PreCollisionInteractors
-	for( SPtr<PreCollisionInteractor> actuator: para->getActuators()){
-		actuator->free(para.get(), cudaMemoryManager.get());
-	}
-
-	for( SPtr<PreCollisionInteractor> probe: para->getProbes()){
-		probe->free(para.get(), cudaMemoryManager.get());
-	}
-	//////////////////////////////////////////////////////////////////////////
+    //PreCollisionInteractors
+    for( SPtr<PreCollisionInteractor> actuator: para->getActuators()){
+        actuator->free(para.get(), cudaMemoryManager.get());
     }
-}
\ No newline at end of file
+
+    for( SPtr<PreCollisionInteractor> probe: para->getProbes()){
+        probe->free(para.get(), cudaMemoryManager.get());
+    }
+    //////////////////////////////////////////////////////////////////////////
+    }
+}
diff --git a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
index e43e88e482ea56e3d2fecdcd1275b209575e3442..35a082a2387a87b28e2ba8ca8ff94279faad86f4 100644
--- a/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
+++ b/src/gpu/VirtualFluids_GPU/LBM/Simulation.h
@@ -6,10 +6,10 @@
 
 #include <PointerDefinitions.h>
 
-#include "Output/LogWriter.hpp"
 #include "Utilities/Buffer2D.hpp"
 #include "LBM/LB.h"
 
+
 namespace vf::gpu { class Communicator; }
 
 class CudaMemoryManager;
@@ -29,28 +29,34 @@ class UpdateGrid27;
 class KineticEnergyAnalyzer;
 class EnstrophyAnalyzer;
 class BoundaryConditionFactory;
+class TurbulenceModelFactory;
 
 class Simulation
 {
 public:
     Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
                vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory);
+	
+	Simulation(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> memoryManager,
+               vf::gpu::Communicator &communicator, GridProvider &gridProvider, BoundaryConditionFactory* bcFactory, SPtr<TurbulenceModelFactory> tmFactory);
+
     ~Simulation();
     void run();
 
     void setFactories(std::unique_ptr<KernelFactory> &&kernelFactory,
                std::unique_ptr<PreProcessorFactory> &&preProcessorFactory);
-    void setDataWriter(std::unique_ptr<DataWriter>&& dataWriter);
+    void setDataWriter(std::shared_ptr<DataWriter> dataWriter);
     void addKineticEnergyAnalyzer(uint tAnalyse);
     void addEnstrophyAnalyzer(uint tAnalyse);
 
 private:
+	void init(GridProvider &gridProvider, BoundaryConditionFactory *bcFactory, SPtr<TurbulenceModelFactory> tmFactory);
     void allocNeighborsOffsetsScalesAndBoundaries(GridProvider& gridProvider);
     void porousMedia();
     void definePMarea(std::shared_ptr<PorousMedia>& pm);
 
 	std::unique_ptr<KernelFactory> kernelFactory;
-	std::unique_ptr<PreProcessorFactory> preProcessorFactory;
+	std::shared_ptr<PreProcessorFactory> preProcessorFactory;
 
 	Buffer2D <real> sbuf_t;
 	Buffer2D <real> rbuf_t;
@@ -63,15 +69,14 @@ private:
 	Buffer2D <int> geo_rbuf_b;
 
 
-	LogWriter output;
-
 	vf::gpu::Communicator& communicator;
     SPtr<Parameter> para;
-    std::unique_ptr<DataWriter> dataWriter;
+    std::shared_ptr<DataWriter> dataWriter;
 	std::shared_ptr<CudaMemoryManager> cudaMemoryManager;
 	std::vector < SPtr< Kernel>> kernels;
 	std::vector < SPtr< ADKernel>> adKernels;
 	std::shared_ptr<PreProcessor> preProcessor;
+	SPtr<TurbulenceModelFactory> tmFactory;
 
     SPtr<RestartObject> restart_object;
 
diff --git a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
index 8a58da5b53406155222d6db9d39b000317ba8cd7..c6e53ee3cbfb98f11e373ca014c7faf4e70a86f0 100644
--- a/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/FileWriter.cpp
@@ -1,11 +1,12 @@
 //  _    ___      __              __________      _     __        ______________   __
 // | |  / (_)____/ /___  ______ _/ / ____/ /_  __(_)___/ /____   /  ___/ __  / /  / /
 // | | / / / ___/ __/ / / / __ `/ / /_  / / / / / / __  / ___/  / /___/ /_/ / /  / /
-// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ / 
+// | |/ / / /  / /_/ /_/ / /_/ / / __/ / / /_/ / / /_/ (__  )  / /_) / ____/ /__/ /
 // |___/_/_/   \__/\__,_/\__,_/_/_/   /_/\__,_/_/\__,_/____/   \____/_/    \_____/
 //
 //////////////////////////////////////////////////////////////////////////
 #include "FileWriter.h"
+#include <logger/Logger.h>
 
 #include <stdio.h>
 #include <fstream>
@@ -24,7 +25,7 @@
 
 void FileWriter::writeInit(std::shared_ptr<Parameter> para, std::shared_ptr<CudaMemoryManager> cudaMemoryManager)
 {
-    unsigned int timestep = para->getTInit();
+    unsigned int timestep = para->getTimestepInit();
     for (int level = para->getCoarse(); level <= para->getFine(); level++) {
         cudaMemoryManager->cudaCopyPrint(level);
         writeTimestep(para, timestep, level);
@@ -52,10 +53,11 @@ void FileWriter::writeTimestep(std::shared_ptr<Parameter> para, unsigned int tim
     const unsigned int numberOfParts = para->getParH(level)->numberOfNodes / para->getlimitOfNodesForVTK() + 1;
     std::vector<std::string> fname;
     std::vector<std::string> fnameMed;
+
     for (unsigned int i = 1; i <= numberOfParts; i++)
     {
-        fname.push_back(para->getFName() + "_bin_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
-        fnameMed.push_back(para->getFName() + "_bin_median_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
+        fname.push_back(para->getFName() + "_bin_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
+        fnameMed.push_back(para->getFName() + "_bin_median_lev_" + StringUtil::toString<int>(level) + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_Part_" + StringUtil::toString<int>(i) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk");
 
         this->fileNamesForCollectionFile.push_back( fname.back() );
         this->fileNamesForCollectionFileMedian.push_back( fnameMed.back() );
@@ -85,7 +87,7 @@ bool FileWriter::isPeriodicCell(std::shared_ptr<Parameter> para, int level, unsi
 void VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFile(std::shared_ptr<Parameter> para, unsigned int timestep)
 {
 
-    std::string filename = para->getFName() + "_bin_ID_" + StringUtil::toString<int>(para->getMyID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
+    std::string filename = para->getFName() + "_bin_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
 
     std::ofstream file;
 
@@ -129,14 +131,14 @@ void VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFile(std::shared_ptr<Pa
 void VIRTUALFLUIDS_GPU_EXPORT FileWriter::writeCollectionFileMedian(std::shared_ptr<Parameter> para, unsigned int timestep)
 {
 
-    std::string filename = para->getFName() + "_bin_median_ID_" + StringUtil::toString<int>(para->getMyID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
+    std::string filename = para->getFName() + "_bin_median_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + StringUtil::toString<int>(timestep) + ".vtk";
 
     std::ofstream file;
 
     file.open( filename + ".pvtu" );
 
     //////////////////////////////////////////////////////////////////////////
-    
+
     file << "<VTKFile type=\"PUnstructuredGrid\" version=\"1.0\" byte_order=\"LittleEndian\" header_type=\"UInt64\">" << std::endl;
     file << "  <PUnstructuredGrid GhostLevel=\"1\">" << std::endl;
 
@@ -181,6 +183,20 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
     nodedatanames.push_back("vx2");
     nodedatanames.push_back("vx3");
     nodedatanames.push_back("geo");
+    
+    uint firstBodyForceNode = (uint) nodedatanames.size();
+    if(para->getIsBodyForce())
+    {
+        nodedatanames.push_back("Fx");
+        nodedatanames.push_back("Fy");
+        nodedatanames.push_back("Fz");
+    }
+
+    uint firstNutNode = (uint) nodedatanames.size();
+    if(para->getUseTurbulentViscosity())
+    {
+        nodedatanames.push_back("nut");
+    }
 
     uint firstTurbNode = (uint) nodedatanames.size();
     if (para->getCalcTurbulenceIntensity()) {
@@ -237,6 +253,18 @@ void FileWriter::writeUnstrucuredGridLT(std::shared_ptr<Parameter> para, int lev
                 nodedata[4][dn1] = (double)para->getParH(level)->velocityZ[pos] * (double)para->getVelocityRatio();
                 nodedata[5][dn1] = (double)para->getParH(level)->typeOfGridNode[pos];
 
+                if(para->getIsBodyForce())
+                {
+                    nodedata[firstBodyForceNode    ][dn1] = (double)para->getParH(level)->forceX_SP[pos] * (double)para->getScaledForceRatio(level);
+                    nodedata[firstBodyForceNode + 1][dn1] = (double)para->getParH(level)->forceY_SP[pos] * (double)para->getScaledForceRatio(level);
+                    nodedata[firstBodyForceNode + 2][dn1] = (double)para->getParH(level)->forceZ_SP[pos] * (double)para->getScaledForceRatio(level);
+                }
+
+                if(para->getUseTurbulentViscosity())
+                {
+                    nodedata[firstNutNode][dn1] = (double)para->getParH(level)->turbViscosity[pos] * (double)para->getScaledViscosityRatio(level);
+                }
+
                 if (para->getCalcTurbulenceIntensity()) {
                     nodedata[firstTurbNode    ][dn1] = (double)para->getParH(level)->vxx[pos];
                     nodedata[firstTurbNode + 1][dn1] = (double)para->getParH(level)->vyy[pos];
@@ -605,7 +633,7 @@ void FileWriter::writeUnstrucuredGridMedianLTConc(std::shared_ptr<Parameter> par
                 if (isPeriodicCell(para, level, number2, number1, number3, number5))
                     continue;
                 //////////////////////////////////////////////////////////////////////////
-                if (neighborsFluid) 
+                if (neighborsFluid)
                     cells.push_back(makeUbTuple(dn1, dn2, dn3, dn4, dn5, dn6, dn7, dn8));
                 //////////////////////////////////////////////////////////////////////////
             }
diff --git a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
index eb00b43acde44d8e5b3af43843c565b1774e65e6..0b1e9dc1c25457457eabe3013a288c4c93577dc3 100644
--- a/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/InterfaceDebugWriter.hpp
@@ -3,8 +3,6 @@
 
 #include <fstream>
 #include <sstream>
-#include <stdio.h>
-// #include <math.h>
 #include "Core/StringUtilities/StringUtil.h"
 #include "lbm/constants/D3Q27.h"
 #include "LBM/LB.h"
@@ -906,6 +904,6 @@ void writeRecvNodesStream(Parameter *para)
         WbWriterVtkXmlBinary::getInstance()->writeNodesWithNodeData(filenameVec, nodesVec, datanames, nodedata);
     }
 }
-} // namespace InterfaceDebugWriter
 
+} // namespace InterfaceDebugWriter
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/Output/LogWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/LogWriter.hpp
deleted file mode 100644
index cbce9a48cdc8ca36127ab61363fdb9a3dff9dec8..0000000000000000000000000000000000000000
--- a/src/gpu/VirtualFluids_GPU/Output/LogWriter.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef LOGWRITER_H
-#define LOGWRITER_H
-
-#include <iostream>
-#include <fstream>
-//#include <string>
-
-//#include "Utilities/StringUtil.hpp"
-
-
-////////////////////////////////////////////////////////////////////////////////
-class LogWriter
-{
-public:
-   LogWriter()
-   {  
-      consoleOut = false;
-   }
-   LogWriter(std::string fname)
-   {
-      consoleOut = false;
-      this->fname = fname;
-   }
-   void setName(std::string name)
-   {
-      this->fname = name;
-   }
-   void setConsoleOut(bool flag)
-   {
-      consoleOut = flag;
-   }
-   void clearLogFile()
-   {
-      ostr.open(fname.c_str(), std::ios_base::out);
-      if (ostr.bad())
-      {
-         std::string exceptionText = "Error: Output file/directory not found! LogWriter::operator << \n";
-         throw exceptionText;
-      }
-      ostr << "";
-      ostr.close();
-   }
-   template <typename T>
-   LogWriter&  operator << (const T& arg)
-   {
-      ostr.open(fname.c_str(), std::ios_base::app);
-      if (ostr.bad())
-      {
-         //std::cout << "Error: Output file/directory not found! LogWriter::operator <<" << std::endl;
-         //return *this;
-         std::string exceptionText = "Error: Output file/directory not found! LogWriter::operator << \n";
-         throw exceptionText;
-      }
-      ostr << arg;
-      ostr.close();
-      if(consoleOut) std::cout << arg << std::flush;
-      return *this;
-   }
-protected:
-private:
-   std::string fname;
-   std::ofstream ostr;
-   bool consoleOut;
-};
-
-#endif	
diff --git a/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..83f0a677b0012153cf079b466a333acc58bda6be
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Output/NeighborDebugWriter.hpp
@@ -0,0 +1,61 @@
+#ifndef NEIGHBORDEBUG_HPP
+#define NEIGHBORDEBUG_HPP
+
+#include "LBM/LB.h"
+#include "Logger.h"
+#include "Parameter/Parameter.h"
+#include "basics/utilities/UbSystem.h"
+#include "grid/NodeValues.h"
+#include "lbm/constants/D3Q27.h"
+#include <basics/writer/WbWriterVtkXmlBinary.h>
+
+#include "Utilities/FindNeighbors.h"
+#include "VirtualFluids_GPU/Communication/Communicator.h"
+#include "Core/StringUtilities/StringUtil.h"
+
+namespace NeighborDebugWriter
+{
+
+inline void writeNeighborLinkLines(Parameter *para, const int level, const uint numberOfNodes, const int direction,
+                                   const std::string &name)
+{
+    VF_LOG_INFO("Write node links in direction {}.", direction);
+    std::vector<UbTupleFloat3> nodes(numberOfNodes * 2);
+    std::vector<UbTupleInt2> cells(numberOfNodes);
+
+    for (uint position = 0; position < numberOfNodes; position++) {
+        if (para->getParH(level)->typeOfGridNode[position] != GEO_FLUID)
+            continue;
+
+        const double x1 = para->getParH(level)->coordinateX[position];
+        const double x2 = para->getParH(level)->coordinateY[position];
+        const double x3 = para->getParH(level)->coordinateZ[position];
+
+        const uint positionNeighbor = getNeighborIndex(para->getParH(level).get(), position, direction);
+
+        const double x1Neighbor = para->getParH(level)->coordinateX[positionNeighbor];
+        const double x2Neighbor = para->getParH(level)->coordinateY[positionNeighbor];
+        const double x3Neighbor = para->getParH(level)->coordinateZ[positionNeighbor];
+
+        nodes.emplace_back(float(x1), float(x2), float(x3));
+        nodes.emplace_back(float(x1Neighbor), float(x2Neighbor), float(x3Neighbor));
+
+        cells.emplace_back((int)nodes.size() - 2, (int)nodes.size() - 1);
+    }
+    WbWriterVtkXmlBinary::getInstance()->writeLines(name, nodes, cells);
+}
+
+inline void writeNeighborLinkLinesDebug(Parameter *para)
+{
+    for (int level = 0; level <= para->getMaxLevel(); level++) {
+        for (int direction = vf::lbm::dir::STARTDIR; direction <= vf::lbm::dir::ENDDIR; direction++) {
+            const std::string fileName = para->getFName() + "_" + StringUtil::toString<int>(level) + "_Link_" +
+                                         std::to_string(direction) + "_Debug.vtk";
+            writeNeighborLinkLines(para, level, para->getParH(level)->numberOfNodes, direction, fileName);
+        }
+    }
+}
+
+} // namespace NeighborDebugWriter
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
index 3f95d95a611da3c75e57dcb3431024e1ca71bd27..74a706165489a86cace40047beb09996aa0aa8db 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.cpp
@@ -1,11 +1,11 @@
-
+#include "Timer.h"
 #include <iostream>
-#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
 #include "UbScheduler.h"
-#include "Timer.h"
+#include "Parameter/Parameter.h"
 #include "VirtualFluids_GPU/Communication/Communicator.h"
 
-
 void Timer::initTimer()
 {
     cudaEventCreate(&this->start_t);
@@ -38,8 +38,8 @@ void Timer::outputPerformance(uint t, Parameter* para, vf::gpu::Communicator& co
     
     for (int lev=para->getCoarse(); lev <= para->getFine(); lev++)
     {
-        fnups       += 1000.0 * (t-para->getTStart()) * para->getParH(lev)->numberOfNodes * pow(2.,lev) / (this->totalElapsedTime*1.0E6);
-        bandwidth   += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTStart()) * para->getParH(lev)->numberOfNodes  / (this->totalElapsedTime*1.0E9);
+        fnups       += 1000.0 * (t-para->getTimestepStart()) * para->getParH(lev)->numberOfNodes * pow(2.,lev) / (this->totalElapsedTime*1.0E6);
+        bandwidth   += (27.0+1.0) * 4.0 * 1000.0 * (t-para->getTimestepStart()) * para->getParH(lev)->numberOfNodes  / (this->totalElapsedTime*1.0E9);
     }
 
     if(this->firstOutput && communicator.getPID() == 0) //only display the legend once
diff --git a/src/gpu/VirtualFluids_GPU/Output/Timer.h b/src/gpu/VirtualFluids_GPU/Output/Timer.h
index 26be785c7f76b7695656c9600bdb586804dca251..d035cbb6cef7ea9f8edabbd2894671a868c37eec 100644
--- a/src/gpu/VirtualFluids_GPU/Output/Timer.h
+++ b/src/gpu/VirtualFluids_GPU/Output/Timer.h
@@ -1,17 +1,15 @@
 #ifndef TIMER_H
 #define TIMER_H
-
-#include "helper_cuda.h"
 #include <cuda_runtime.h>
-#include "Core/DataTypes.h"
 
-#include "UbScheduler.h"
-#include "logger/Logger.h"
+#include "Core/DataTypes.h"
 #include "Parameter/Parameter.h"
+#include "logger/Logger.h"
 
 namespace vf::gpu{
     class Communicator;
 }
+class Parameter;
 
 class Timer
 {
diff --git a/src/gpu/VirtualFluids_GPU/Output/VeloASCIIWriter.hpp b/src/gpu/VirtualFluids_GPU/Output/VeloASCIIWriter.hpp
index 7a1a1f468a85811edcf16c0f3511158a9755139b..5973525d23233ab49121d2cfd7dd6ed16666229d 100644
--- a/src/gpu/VirtualFluids_GPU/Output/VeloASCIIWriter.hpp
+++ b/src/gpu/VirtualFluids_GPU/Output/VeloASCIIWriter.hpp
@@ -16,7 +16,7 @@ public:
 		//calc
 		int numberNodes = (int)para->getParH(level)->numberOfNodes;
 		//write
-		UbFileOutputASCII out(para->getFName() + "_VelocitiesASCII_" + std::to_string(level) + "_ID_" + StringUtil::toString<int>(para->getMyID()) + "_t_" + std::to_string(t) + ".dat");
+		UbFileOutputASCII out(para->getFName() + "_VelocitiesASCII_" + std::to_string(level) + "_ID_" + StringUtil::toString<int>(para->getMyProcessID()) + "_t_" + std::to_string(t) + ".dat");
 		//header
 		out.writeString("Level:");
 		out.writeInteger(level);
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinder.cpp b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinder.cpp
index a2662249f25b30dda9d582e188d5ff126b2c2c5b..fc1d9b19752be009c8105e5d2759c33afe8e9400 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinder.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinder.cpp
@@ -7,11 +7,11 @@
 namespace vf::gpu
 {
 //! \brief Find nodes that are both received in the x-direction and sent in the y-direction
-void findEdgeNodesXY(const int level, LBMSimulationParameter& parameterLB);
+void findEdgeNodesXY(LBMSimulationParameter& parameterLB);
 //! \brief Find nodes that are both received in the x-direction and sent in the z-direction
-void findEdgeNodesXZ(const int level, LBMSimulationParameter& parameterLB);
+void findEdgeNodesXZ(LBMSimulationParameter& parameterLB);
 //! \brief Find nodes that are both received in the y-direction and sent in the z-direction
-void findEdgeNodesYZ(const int level, LBMSimulationParameter& parameterLB);
+void findEdgeNodesYZ(LBMSimulationParameter& parameterLB);
 void findEdgeNodes(const std::vector<ProcessNeighbor27> &recvProcessNeighbor,
                    const std::vector<ProcessNeighbor27> &sendProcessNeighbor,
                    std::vector<LBMSimulationParameter::EdgeNodePositions> &edgeNodes);
@@ -21,25 +21,25 @@ std::optional<std::pair<int, int>> findIndexInSendNodes(const int nodeIndex,
 void findEdgeNodesCommMultiGPU(Parameter& parameter)
 {
     for (int level = 0; level <= parameter.getFine(); level++) {
-        findEdgeNodesXY(level, *parameter.getParH(level));
-        findEdgeNodesXZ(level, *parameter.getParH(level));
-        findEdgeNodesYZ(level, *parameter.getParH(level));
+        findEdgeNodesXY(*parameter.getParH(level));
+        findEdgeNodesXZ(*parameter.getParH(level));
+        findEdgeNodesYZ(*parameter.getParH(level));
     }
 }
 
-void findEdgeNodesXY(const int level, LBMSimulationParameter& parameterLB)
+void findEdgeNodesXY(LBMSimulationParameter& parameterLB)
 {
     findEdgeNodes(parameterLB.recvProcessNeighborX, parameterLB.sendProcessNeighborY,
                   parameterLB.edgeNodesXtoY);
 }
 
-void findEdgeNodesXZ(const int level, LBMSimulationParameter& parameterLB)
+void findEdgeNodesXZ(LBMSimulationParameter& parameterLB)
 {
     findEdgeNodes(parameterLB.recvProcessNeighborX, parameterLB.sendProcessNeighborZ,
                   parameterLB.edgeNodesXtoZ);
 }
 
-void findEdgeNodesYZ(const int level, LBMSimulationParameter& parameterLB)
+void findEdgeNodesYZ(LBMSimulationParameter& parameterLB)
 {
     findEdgeNodes(parameterLB.recvProcessNeighborY, parameterLB.sendProcessNeighborZ,
                   parameterLB.edgeNodesYtoZ);
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp
index 89f300b4ce9e14d76ee54fb02ba5f9102893bb98..c63c1620ae368cdb31ed582814b472b4695114bf 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/EdgeNodeFinderTest.cpp
@@ -43,7 +43,7 @@ protected:
 private:
     void SetUp() override
     {
-        para = std::make_shared<Parameter>(1, 0);
+        para = std::make_shared<Parameter>();
         para->initLBMSimulationParameter();
     }
 };
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
index 42e53bb681b3977f42ea8ebfebd0fbdebae37444..4123f39f351c4bf41d536bff0d1deea3fbe6e2aa 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.cpp
@@ -32,9 +32,10 @@
 //=======================================================================================
 #include "Parameter.h"
 
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <optional>
 
 #include <curand_kernel.h>
 
@@ -44,32 +45,26 @@
 
 #include "Parameter/CudaStreamManager.h"
 
-Parameter::Parameter(int numberOfProcesses, int myId)
-{
-    this->ic.numprocs = numberOfProcesses; 
-    this->ic.myid = myId;
-    
-    initGridPaths();
-    initGridBasePoints();
-    initDefaultLBMkernelAllLevels();
-    this->setFName(this->getOutputPath() + this->getOutputPrefix());
+Parameter::Parameter() : Parameter(1, 0, {}) {}
 
-    // initLBMSimulationParameter();
-}
+Parameter::Parameter(const vf::basics::ConfigurationFile* configData) : Parameter(1, 0, configData) {}
 
-Parameter::Parameter(const vf::basics::ConfigurationFile &configData, int numberOfProcesses, int myId)
+Parameter::Parameter(int numberOfProcesses, int myId) : Parameter(numberOfProcesses, myId, {}) {}
+
+Parameter::Parameter(int numberOfProcesses, int myId, std::optional<const vf::basics::ConfigurationFile*> configData)
 {
-    this->ic.numprocs = numberOfProcesses; 
-    this->ic.myid = myId;
+    this->ic.numprocs = numberOfProcesses;
+    this->ic.myProcessId = myId;
+
+    this->setQuadricLimiters(0.01, 0.01, 0.01);
+    this->setForcing(0.0, 0.0, 0.0);
 
-    readConfigData(configData);
+    if(configData)
+        readConfigData(**configData);
 
     initGridPaths();
     initGridBasePoints();
     initDefaultLBMkernelAllLevels();
-    this->setFName(this->getOutputPath() + this->getOutputPrefix());
-
-    // initLBMSimulationParameter();
 }
 
 Parameter::~Parameter() = default;
@@ -92,7 +87,7 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
         this->setPrintFiles(configData.getValue<bool>("WriteGrid"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("GeometryValues"))
-        this->setGeometryValues(configData.getValue<bool>("GeometryValues"));
+        this->setUseGeometryValues(configData.getValue<bool>("GeometryValues"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("calc2ndOrderMoments"))
         this->setCalc2ndOrderMoments(configData.getValue<bool>("calc2ndOrderMoments"));
@@ -130,12 +125,6 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
     if (configData.contains("UseWale"))
         this->setUseWale(configData.getValue<bool>("UseWale"));
     //////////////////////////////////////////////////////////////////////////
-    if (configData.contains("UseAMD"))
-        this->setUseAMD(configData.getValue<bool>("UseAMD"));
-    //////////////////////////////////////////////////////////////////////////
-    if (configData.contains("SGSconstant"))
-        this->setSGSConstant(configData.getValue<real>("SGSconstant"));
-    //////////////////////////////////////////////////////////////////////////
     if (configData.contains("UseInitNeq"))
         this->setUseInitNeq(configData.getValue<bool>("UseInitNeq"));
     //////////////////////////////////////////////////////////////////////////
@@ -146,13 +135,13 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
         this->setD3Qxx(configData.getValue<int>("D3Qxx"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("TimeEnd"))
-        this->setTEnd(configData.getValue<int>("TimeEnd"));
+        this->setTimestepEnd(configData.getValue<int>("TimeEnd"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("TimeOut"))
-        this->setTOut(configData.getValue<int>("TimeOut"));
+        this->setTimestepOut(configData.getValue<int>("TimeOut"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("TimeStartOut"))
-        this->setTStartOut(configData.getValue<int>("TimeStartOut"));
+        this->setTimestepStartOut(configData.getValue<int>("TimeStartOut"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("TimeStartCalcMedian"))
         this->setTimeCalcMedStart(configData.getValue<int>("TimeStartCalcMedian"));
@@ -191,10 +180,10 @@ void Parameter::readConfigData(const vf::basics::ConfigurationFile &configData)
 
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("Viscosity_LB"))
-        this->setViscosity(configData.getValue<real>("Viscosity_LB"));
+        this->setViscosityLB(configData.getValue<real>("Viscosity_LB"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("Velocity_LB"))
-        this->setVelocity(configData.getValue<real>("Velocity_LB"));
+        this->setVelocityLB(configData.getValue<real>("Velocity_LB"));
     //////////////////////////////////////////////////////////////////////////
     if (configData.contains("Viscosity_Ratio_World_to_LB"))
         this->setViscosityRatio(configData.getValue<real>("Viscosity_Ratio_World_to_LB"));
@@ -372,12 +361,12 @@ void Parameter::initGridPaths(){
 
     // for multi-gpu add process id (if not already there)
     if (this->getNumprocs() > 1) {
-        gridPath += StringUtil::toString(this->getMyID()) + "/";
+        gridPath += StringUtil::toString(this->getMyProcessID()) + "/";
         ic.gridPath = gridPath;
     }
 
     //////////////////////////////////////////////////////////////////////////
-        
+
     this->setgeoVec(gridPath + "geoVec.dat");
     this->setcoordX(gridPath + "coordX.dat");
     this->setcoordY(gridPath + "coordY.dat");
@@ -415,7 +404,7 @@ void Parameter::initGridPaths(){
     this->setcpBottom2(gridPath + "cpBottom2.dat");
     this->setConcentration(gridPath + "conc.dat");
     this->setStreetVelocity(gridPath + "streetVector.dat");
-    
+
     //////////////////////////////////////////////////////////////////////////
     // Normals - Geometry
     this->setgeomBoundaryNormalX(gridPath + "geomBoundaryNormalX.dat");
@@ -430,11 +419,11 @@ void Parameter::initGridPaths(){
     this->setOutflowBoundaryNormalY(gridPath + "outletBoundaryNormalY.dat");
     this->setOutflowBoundaryNormalZ(gridPath + "outletBoundaryNormalZ.dat");
     //////////////////////////////////////////////////////////////////////////
-    
+
     //////////////////////////////////////////////////////////////////////////
     // for Multi GPU
     if (this->getNumprocs() > 1) {
-        
+
         // 3D domain decomposition
         std::vector<std::string> sendProcNeighborsX, sendProcNeighborsY, sendProcNeighborsZ;
         std::vector<std::string> recvProcNeighborsX, recvProcNeighborsY, recvProcNeighborsZ;
@@ -452,7 +441,7 @@ void Parameter::initGridPaths(){
         this->setPossNeighborFilesX(recvProcNeighborsX, "recv");
         this->setPossNeighborFilesY(recvProcNeighborsY, "recv");
         this->setPossNeighborFilesZ(recvProcNeighborsZ, "recv");
-    
+
     //////////////////////////////////////////////////////////////////////////
     }
 }
@@ -482,7 +471,7 @@ void Parameter::initDefaultLBMkernelAllLevels(){
         }
         this->setMultiKernelLevel(tmp);
     }
-    
+
     if (this->getMultiKernelOn() && this->getMultiKernel().empty()) {
         std::vector<std::string> tmp;
         for (int i = 0; i < this->getMaxLevel() + 1; i++) {
@@ -516,8 +505,8 @@ void Parameter::initLBMSimulationParameter()
         parH[i]->mem_size_bool    = sizeof(bool) * parH[i]->size_Mat;
         parH[i]->mem_size_real_yz = sizeof(real) * parH[i]->ny * parH[i]->nz;
         parH[i]->isEvenTimestep        = true;
-        parH[i]->startz           = parH[i]->gridNZ * ic.myid;
-        parH[i]->endz             = parH[i]->gridNZ * ic.myid + parH[i]->gridNZ;
+        parH[i]->startz           = parH[i]->gridNZ * ic.myProcessId;
+        parH[i]->endz             = parH[i]->gridNZ * ic.myProcessId + parH[i]->gridNZ;
         parH[i]->Lx               = (real)((1.f * parH[i]->gridNX - 1.f) / (pow(2.f, i)));
         parH[i]->Ly               = (real)((1.f * parH[i]->gridNY - 1.f) / (pow(2.f, i)));
         parH[i]->Lz               = (real)((1.f * parH[i]->gridNZ - 1.f) / (pow(2.f, i)));
@@ -668,9 +657,9 @@ void Parameter::setD3Qxx(int d3qxx)
 {
     this->D3Qxx = d3qxx;
 }
-void Parameter::setMaxLevel(int maxlevel)
+void Parameter::setMaxLevel(int numberOfLevels)
 {
-    this->maxlevel = maxlevel - 1;
+    this->maxlevel = numberOfLevels - 1;
     this->fine = this->maxlevel;
     parH.resize(this->maxlevel + 1);
     parD.resize(this->maxlevel + 1);
@@ -699,15 +688,15 @@ void Parameter::setEndXHotWall(real endXHotWall)
 {
     this->endXHotWall = endXHotWall;
 }
-void Parameter::setTEnd(unsigned int tend)
+void Parameter::setTimestepEnd(unsigned int tend)
 {
     ic.tend = tend;
 }
-void Parameter::setTOut(unsigned int tout)
+void Parameter::setTimestepOut(unsigned int tout)
 {
     ic.tout = tout;
 }
-void Parameter::setTStartOut(unsigned int tStartOut)
+void Parameter::setTimestepStartOut(unsigned int tStartOut)
 {
     ic.tStartOut = tStartOut;
 }
@@ -754,12 +743,14 @@ void Parameter::setOutputPath(std::string oPath)
         oPath += "/";
 
     ic.oPath = oPath;
+    this->setPathAndFilename(this->getOutputPath() + this->getOutputPrefix());
 }
 void Parameter::setOutputPrefix(std::string oPrefix)
 {
     ic.oPrefix = oPrefix;
+    this->setPathAndFilename(this->getOutputPath() + this->getOutputPrefix());
 }
-void Parameter::setFName(std::string fname)
+void Parameter::setPathAndFilename(std::string fname)
 {
     ic.fname = fname;
 }
@@ -788,11 +779,11 @@ void Parameter::setTemperatureBC(real TempBC)
 {
     ic.TempBC = TempBC;
 }
-void Parameter::setViscosity(real Viscosity)
+void Parameter::setViscosityLB(real Viscosity)
 {
     ic.vis = Viscosity;
 }
-void Parameter::setVelocity(real Velocity)
+void Parameter::setVelocityLB(real Velocity)
 {
     ic.u0 = Velocity;
 }
@@ -812,17 +803,61 @@ void Parameter::setPressRatio(real PressRatio)
 {
     ic.delta_press = PressRatio;
 }
+real Parameter::getViscosityRatio()
+{
+    return ic.vis_ratio;
+}
+real Parameter::getVelocityRatio()
+{
+    return ic.u0_ratio;
+}
+real Parameter::getDensityRatio()
+{
+    return ic.delta_rho;
+}
+real Parameter::getPressureRatio()
+{
+    return ic.delta_press;
+}
 real Parameter::getTimeRatio()
 {
     return this->getViscosityRatio() * pow(this->getVelocityRatio(), -2);
 }
+real Parameter::getLengthRatio()
+{
+    return this->getViscosityRatio() / this->getVelocityRatio();
+}
 real Parameter::getForceRatio()
 {
-    return this->getDensityRatio() * pow(this->getViscosityRatio(), 2);
+    return this->getDensityRatio() * this->getVelocityRatio()/this->getTimeRatio();
 }
-real Parameter::getLengthRatio()
+real Parameter::getScaledViscosityRatio(int level)
 {
-    return this->getViscosityRatio() / this->getVelocityRatio();
+    return this->getViscosityRatio()/(level+1);
+}
+real Parameter::getScaledVelocityRatio(int level)
+{
+    return this->getVelocityRatio();
+}
+real Parameter::getScaledDensityRatio(int level)
+{
+    return this->getDensityRatio();
+}
+real Parameter::getScaledPressureRatio(int level)
+{
+    return this->getPressureRatio();
+}
+real Parameter::getScaledTimeRatio(int level)
+{
+    return this->getTimeRatio()/(level+1);
+}
+real Parameter::getScaledLengthRatio(int level)
+{
+    return this->getLengthRatio()/(level+1);
+}
+real Parameter::getScaledForceRatio(int level)
+{
+    return this->getForceRatio()*(level+1);
 }
 void Parameter::setRealX(real RealX)
 {
@@ -848,13 +883,17 @@ void Parameter::setPressOutZ(unsigned int PressOutZ)
 {
     ic.PressOutZ = PressOutZ;
 }
+void Parameter::setOutflowPressureCorrectionFactor(real pressBCrhoCorrectionFactor)
+{
+    ic.outflowPressureCorrectionFactor = pressBCrhoCorrectionFactor;
+}
 void Parameter::setMaxDev(int maxdev)
 {
     ic.maxdev = maxdev;
 }
 void Parameter::setMyID(int myid)
 {
-    ic.myid = myid;
+    ic.myProcessId = myid;
 }
 void Parameter::setNumprocs(int numprocs)
 {
@@ -938,11 +977,9 @@ void Parameter::setUseWale(bool useWale)
     if (useWale)
         setUseTurbulentViscosity(true);
 }
-void Parameter::setUseAMD(bool useAMD)
+void Parameter::setTurbulenceModel(TurbulenceModel turbulenceModel)
 {
-    ic.isAMD = useAMD;
-    if (useAMD)
-        setUseTurbulentViscosity(true);
+    ic.turbulenceModel = turbulenceModel;
 }
 void Parameter::setSGSConstant(real SGSConstant)
 {
@@ -1331,9 +1368,9 @@ void Parameter::setObj(std::string str, bool isObj)
         this->setIsOutflowNormal(isObj);
     }
 }
-void Parameter::setGeometryValues(bool GeometryValues)
+void Parameter::setUseGeometryValues(bool useGeometryValues)
 {
-    ic.GeometryValues = GeometryValues;
+    ic.GeometryValues = useGeometryValues;
 }
 void Parameter::setCalc2ndOrderMoments(bool is2ndOrderMoments)
 {
@@ -1574,7 +1611,7 @@ void Parameter::setOutflowBoundaryNormalZ(std::string outflowNormalZ)
 void Parameter::setMainKernel(std::string kernel)
 {
     this->mainKernel = kernel;
-    if (kernel.find("Stream") != std::string::npos)
+    if (kernel.find("Stream") != std::string::npos || kernel.find("Redesigned") != std::string::npos)
         this->kernelNeedsFluidNodeIndicesToRun = true;
 }
 void Parameter::setMultiKernelOn(bool isOn)
@@ -1715,7 +1752,7 @@ int Parameter::getNumberOfParticles()
 }
 bool Parameter::getEvenOrOdd(int level)
 {
-    return parH[level]->isEvenTimestep;
+	return parD[level]->isEvenTimestep;
 }
 bool Parameter::getDiffOn()
 {
@@ -1741,7 +1778,7 @@ int Parameter::getMaxLevel()
 {
     return this->maxlevel;
 }
-unsigned int Parameter::getTStart()
+unsigned int Parameter::getTimestepStart()
 {
     if (getDoRestart()) {
         return getTimeDoRestart() + 1;
@@ -1749,7 +1786,7 @@ unsigned int Parameter::getTStart()
         return 1;
     }
 }
-unsigned int Parameter::getTInit()
+unsigned int Parameter::getTimestepInit()
 {
     if (getDoRestart()) {
         return getTimeDoRestart();
@@ -1757,15 +1794,15 @@ unsigned int Parameter::getTInit()
         return 0;
     }
 }
-unsigned int Parameter::getTEnd()
+unsigned int Parameter::getTimestepEnd()
 {
     return ic.tend;
 }
-unsigned int Parameter::getTOut()
+unsigned int Parameter::getTimestepOut()
 {
     return ic.tout;
 }
-unsigned int Parameter::getTStartOut()
+unsigned int Parameter::getTimestepStartOut()
 {
     return ic.tStartOut;
 }
@@ -1781,7 +1818,7 @@ bool Parameter::getCalcCp()
 {
     return this->calcCp;
 }
-bool Parameter::getCalcParticle()
+bool Parameter::getCalcParticles()
 {
     return this->calcParticles;
 }
@@ -1849,22 +1886,6 @@ real Parameter::getVelocity()
 {
     return ic.u0;
 }
-real Parameter::getViscosityRatio()
-{
-    return ic.vis_ratio;
-}
-real Parameter::getVelocityRatio()
-{
-    return ic.u0_ratio;
-}
-real Parameter::getDensityRatio()
-{
-    return ic.delta_rho;
-}
-real Parameter::getPressRatio()
-{
-    return ic.delta_press;
-}
 real Parameter::getRealX()
 {
     return ic.RealX;
@@ -1889,13 +1910,17 @@ unsigned int Parameter::getPressOutZ()
 {
     return ic.PressOutZ;
 }
+real Parameter::getOutflowPressureCorrectionFactor()
+{
+    return ic.outflowPressureCorrectionFactor;
+}
 int Parameter::getMaxDev()
 {
     return ic.maxdev;
 }
-int Parameter::getMyID()
+int Parameter::getMyProcessID()
 {
-    return ic.myid;
+    return ic.myProcessId;
 }
 int Parameter::getNumprocs()
 {
@@ -2265,6 +2290,26 @@ unsigned int Parameter::getTimeDoRestart()
 {
     return ic.tDoRestart;
 }
+
+//=======================================================================================
+//! \brief Get current (sub)time step of a given level.
+//! \param level 
+//! \param t current time step (of level 0)
+//! \param isPostCollision whether getTimeStep is called post- (before swap) or pre- (after swap) collision
+//!
+unsigned int Parameter::getTimeStep(int level, unsigned int t, bool isPostCollision)
+{
+    if(level>this->getMaxLevel()) throw std::runtime_error("Parameter::getTimeStep: level>this->getMaxLevel()!");
+	unsigned int tLevel = t;                                                                  
+    if(level>0)
+    {
+        for(int i=1; i<level; i++){ tLevel = 1 + 2*(tLevel-1) + !this->getEvenOrOdd(i); }     
+        bool addOne = isPostCollision? !this->getEvenOrOdd(level): this->getEvenOrOdd(level); 
+        tLevel = 1 + 2*(tLevel-1) + addOne;
+    }
+	return tLevel;
+}
+
 bool Parameter::getDoCheckPoint()
 {
     return ic.doCheckPoint;
@@ -2309,9 +2354,9 @@ bool Parameter::getUseWale()
 {
     return ic.isWale;
 }
-bool Parameter::getUseAMD()
+TurbulenceModel Parameter::getTurbulenceModel()
 {
-    return ic.isAMD;
+    return ic.turbulenceModel;
 }
 bool Parameter::getUseTurbulentViscosity()
 {
@@ -2634,6 +2679,10 @@ bool Parameter::getKernelNeedsFluidNodeIndicesToRun()
     return this->kernelNeedsFluidNodeIndicesToRun;
 }
 
+void Parameter::setKernelNeedsFluidNodeIndicesToRun(bool  kernelNeedsFluidNodeIndicesToRun){
+    this->kernelNeedsFluidNodeIndicesToRun = kernelNeedsFluidNodeIndicesToRun;
+}
+
 void Parameter::initProcessNeighborsAfterFtoCX(int level)
 {
     this->getParH(level)->sendProcessNeighborsAfterFtoCX.resize(this->getParH(level)->sendProcessNeighborX.size());
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
index 84ebbaf36595a9af3ec522ec242b763817ad7035..3ffad87eb67af6942757c51d60413813b9329044 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
+++ b/src/gpu/VirtualFluids_GPU/Parameter/Parameter.h
@@ -37,10 +37,12 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <optional>
 
 #include "lbm/constants/D3Q27.h"
 #include "LBM/LB.h"
 #include "PreCollisionInteractor/PreCollisionInteractor.h"
+#include "TurbulenceModels/TurbulenceModelFactory.h"
 
 #include "VirtualFluids_GPU_export.h"
 
@@ -118,7 +120,7 @@ struct LBMSimulationParameter {
     uint *neighborX, *neighborY, *neighborZ, *neighborInverse;
 
     // coordinates////////////////////////////////////////////////////////////
-    //! \brief store the coordinates for every lattice node 
+    //! \brief store the coordinates for every lattice node
     real *coordinateX, *coordinateY, *coordinateZ;
 
     // body forces////////////
@@ -235,6 +237,7 @@ struct LBMSimulationParameter {
 
     WallModelParameters wallModel;
     std::vector<SPtr<VelocityReader>> velocityReader;
+    real outflowPressureCorrectionFactor;
 
     // testRoundoffError
     Distributions27 kDistTestRE;
@@ -370,16 +373,19 @@ struct LBMSimulationParameter {
     uint *fluidNodeIndices;
     uint numberOfFluidNodes;
     uint *fluidNodeIndicesBorder;
-    uint numberOffluidNodesBorder;
+    uint numberOfFluidNodesBorder;
 };
 
 //! \brief Class for LBM-parameter management
 class VIRTUALFLUIDS_GPU_EXPORT Parameter
 {
 public:
-    Parameter(const vf::basics::ConfigurationFile &configData, const int numberOfProcesses = 1, const int myId = 0);
-    Parameter(const int numberOfProcesses = 1, const int myId = 0);
+    Parameter();
+    explicit Parameter(const vf::basics::ConfigurationFile* configData);
+    explicit Parameter(const int numberOfProcesses, const int myId);
+    explicit Parameter(const int numberOfProcesses, const int myId, std::optional<const vf::basics::ConfigurationFile*> configData);
     ~Parameter();
+
     void initLBMSimulationParameter();
 
     //! \brief Pointer to instance of LBMSimulationParameter - stored on Host System
@@ -404,16 +410,16 @@ public:
     void setDiffMod(int DiffMod);
     void setDiffusivity(real Diffusivity);
     void setD3Qxx(int d3qxx);
-    void setMaxLevel(int maxlevel);
+    void setMaxLevel(int numberOfLevels);
     void setParticleBasicLevel(int pbl);
     void setParticleInitLevel(int pil);
     void setNumberOfParticles(int nop);
     void setCalcParticles(bool calcParticles);
     void setStartXHotWall(real startXHotWall);
     void setEndXHotWall(real endXHotWall);
-    void setTEnd(unsigned int tend);
-    void setTOut(unsigned int tout);
-    void setTStartOut(unsigned int tStartOut);
+    void setTimestepEnd(unsigned int tend);
+    void setTimestepOut(unsigned int tout);
+    void setTimestepStartOut(unsigned int tStartOut);
     void setTimestepOfCoarseLevel(unsigned int timestep);
     void setCalcTurbulenceIntensity(bool calcVelocityAndFluctuations);
     void setCalcMedian(bool calcMedian);
@@ -433,7 +439,6 @@ public:
     void settimestepForMP(unsigned int timestepForMP);
     void setOutputPath(std::string oPath);
     void setOutputPrefix(std::string oPrefix);
-    void setFName(std::string fname);
     void setGridPath(std::string gridPath);
     void setGeometryFileC(std::string GeometryFileC);
     void setGeometryFileM(std::string GeometryFileM);
@@ -466,6 +471,7 @@ public:
     void setpressBcPos(std::string pressBcPos);
     void setpressBcQs(std::string pressBcQs);
     void setpressBcValue(std::string pressBcValue);
+    void setOutflowPressureCorrectionFactor(real correctionFactor);
     void setpressBcValues(std::string pressBcValues);
     void setvelBcQs(std::string velBcQs);
     void setvelBcValues(std::string velBcValues);
@@ -500,8 +506,8 @@ public:
     void setReadGeo(bool readGeo);
     void setTemperatureInit(real Temp);
     void setTemperatureBC(real TempBC);
-    void setViscosity(real Viscosity);
-    void setVelocity(real Velocity);
+    void setViscosityLB(real Viscosity);
+    void setVelocityLB(real Velocity);
     void setViscosityRatio(real ViscosityRatio);
     void setVelocityRatio(real VelocityRatio);
     void setDensityRatio(real DensityRatio);
@@ -520,6 +526,7 @@ public:
     void setStreetVelocityFile(bool streetVelocityFile);
     void setUseMeasurePoints(bool useMeasurePoints);
     void setUseWale(bool useWale);
+    void setTurbulenceModel(TurbulenceModel turbulenceModel);
     void setUseTurbulentViscosity(bool useTurbulentViscosity);
     void setUseAMD(bool useAMD);
     void setSGSConstant(real SGSConstant);
@@ -555,7 +562,7 @@ public:
     void setDoCheckPoint(bool doCheckPoint);
     void setDoRestart(bool doRestart);
     void setObj(std::string str, bool isObj);
-    void setGeometryValues(bool GeometryValues);
+    void setUseGeometryValues(bool GeometryValues);
     void setCalc2ndOrderMoments(bool is2ndOrderMoments);
     void setCalc3rdOrderMoments(bool is3rdOrderMoments);
     void setCalcHighOrderMoments(bool isHighOrderMoments);
@@ -630,7 +637,7 @@ public:
     bool getCalcMedian();
     bool getCalcDragLift();
     bool getCalcCp();
-    bool getCalcParticle();
+    bool getCalcParticles();
     bool getWriteVeloASCIIfiles();
     bool getCalcPlaneConc();
     //! \returns index of finest level
@@ -643,11 +650,13 @@ public:
     int getDiffMod();
     int getFactorNZ();
     int getD3Qxx();
+    //! \returns the maximum level of grid refinement
     int getMaxLevel();
     int getTimeCalcMedStart();
     int getTimeCalcMedEnd();
     int getMaxDev();
-    int getMyID();
+    //! \returns the ID of the current MPI process
+    int getMyProcessID();
     int getNumprocs();
     std::string getOutputPath();
     std::string getOutputPrefix();
@@ -723,11 +732,11 @@ public:
     unsigned int getMemSizeBool(int level);
     unsigned int getMemSizerealYZ(int level);
     unsigned int getSizeMat(int level);
-    unsigned int getTStart();
-    unsigned int getTInit();
-    unsigned int getTEnd();
-    unsigned int getTOut();
-    unsigned int getTStartOut();
+    unsigned int getTimestepStart();
+    unsigned int getTimestepInit();
+    unsigned int getTimestepEnd();
+    unsigned int getTimestepOut();
+    unsigned int getTimestepStartOut();
     unsigned int getTimestepForMP();
     unsigned int getTimestepOfCoarseLevel();
     real getDiffusivity();
@@ -735,13 +744,34 @@ public:
     real getTemperatureBC();
     real getViscosity();
     real getVelocity();
+    //! \returns the viscosity ratio in SI/LB units
     real getViscosityRatio();
+    //! \returns the velocity ratio in SI/LB units
     real getVelocityRatio();
+    //! \returns the density ratio in SI/LB units
     real getDensityRatio();
-    real getPressRatio();
+    //! \returns the pressure ratio in SI/LB units
+    real getPressureRatio();
+    //! \returns the time ratio in SI/LB units
     real getTimeRatio();
+    //! \returns the length ratio in SI/LB units
     real getLengthRatio();
+    //! \returns the force ratio in SI/LB units
     real getForceRatio();
+    //! \returns the viscosity ratio in SI/LB units scaled to the respective level
+    real getScaledViscosityRatio(int level);
+    //! \returns the velocity ratio in SI/LB units scaled to the respective level
+    real getScaledVelocityRatio(int level);
+    //! \returns the density ratio in SI/LB units scaled to the respective level
+    real getScaledDensityRatio(int level);
+    //! \returns the pressure ratio in SI/LB units scaled to the respective level
+    real getScaledPressureRatio(int level);
+    //! \returns the time ratio in SI/LB units scaled to the respective level
+    real getScaledTimeRatio(int level);
+    //! \returns the length ratio in SI/LB units scaled to the respective level
+    real getScaledLengthRatio(int level);
+    //! \returns the force ratio in SI/LB units scaled to the respective level
+    real getScaledForceRatio(int level);
     real getRealX();
     real getRealY();
     real getRe();
@@ -769,9 +799,11 @@ public:
     TempPressforBoundaryConditions *getTempPressH();
     TempPressforBoundaryConditions *getTempPressD();
     std::vector<SPtr<PreCollisionInteractor>> getActuators();
+    //! \returns the probes, e.g. point or plane probe
     std::vector<SPtr<PreCollisionInteractor>> getProbes();
     unsigned int getTimeDoCheckPoint();
     unsigned int getTimeDoRestart();
+    unsigned int getTimeStep(int level, unsigned int t, bool isPostCollision);
     bool getDoCheckPoint();
     bool getDoRestart();
     bool overWritingRestart(unsigned int t);
@@ -789,8 +821,8 @@ public:
     bool isStreetVelocityFile();
     bool getUseMeasurePoints();
     bool getUseWale();
+    TurbulenceModel getTurbulenceModel();
     bool getUseTurbulentViscosity();
-    bool getUseAMD();
     real getSGSConstant();
     bool getHasWallModelMonitor();
     bool getUseInitNeq();
@@ -822,6 +854,7 @@ public:
     std::string getOutflowBoundaryNormalX();
     std::string getOutflowBoundaryNormalY();
     std::string getOutflowBoundaryNormalZ();
+    real getOutflowPressureCorrectionFactor();
     // CUDA random number
     curandState *getRandomState();
     // Kernel
@@ -863,6 +896,8 @@ private:
     void initGridBasePoints();
     void initDefaultLBMkernelAllLevels();
 
+    void setPathAndFilename(std::string fname);
+
 private:
     bool compOn{ false };
     bool diffOn{ false };
@@ -874,6 +909,7 @@ private:
     bool calcVelocityAndFluctuations{ false };
     bool isBodyForce{ false };
     int diffMod{ 27 };
+    //! \property maximum level of grid refinement
     int maxlevel{ 0 };
     int coarse{ 0 };
     int fine{ 0 };
@@ -886,7 +922,7 @@ private:
     unsigned int timestep;
 
     // Kernel
-    std::string mainKernel{ "CumulantK17Comp" };
+    std::string mainKernel{ "CumulantK17CompChim" };
     bool multiKernelOn{ false };
     std::vector<int> multiKernelLevel;
     std::vector<std::string> multiKernel;
@@ -966,6 +1002,7 @@ public:
     bool getUseStreams();
     std::unique_ptr<CudaStreamManager> &getStreamManager();
     bool getKernelNeedsFluidNodeIndicesToRun();
+    void setKernelNeedsFluidNodeIndicesToRun(bool  kernelNeedsFluidNodeIndicesToRun);
 
     void initProcessNeighborsAfterFtoCX(int level);
     void initProcessNeighborsAfterFtoCY(int level);
diff --git a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
index 9e05ed1332b34420656e6c0c81f07501da7c7aac..5d8f34520811c80a5fe76b7084f5733d6381e187 100644
--- a/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
+++ b/src/gpu/VirtualFluids_GPU/Parameter/ParameterTest.cpp
@@ -24,7 +24,7 @@ TEST(ParameterTest, passingEmptyFileWithoutPath_ShouldNotThrow)
     vf::basics::ConfigurationFile config;
     config.load(filePath.string());
 
-    EXPECT_NO_THROW(Parameter para(config, 1, 0));
+    EXPECT_NO_THROW(Parameter para(1, 0, &config));
 }
 
 // TODO: test setPossNeighborFilesX
@@ -39,7 +39,7 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
     vf::basics::ConfigurationFile config;
     config.load(filePath.string());
 
-    Parameter para(config);
+    Parameter para(1, 0, &config);
 
     // test optional parameter
     EXPECT_THAT(para.getOutputPath(), testing::Eq("/output/path/"));
@@ -66,9 +66,9 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
     EXPECT_THAT(para.getSimulatePorousMedia(), testing::Eq(true));
 
     EXPECT_THAT(para.getD3Qxx(), testing::Eq(99));
-    EXPECT_THAT(para.getTEnd(), testing::Eq(33));
-    EXPECT_THAT(para.getTOut(), testing::Eq(22));
-    EXPECT_THAT(para.getTStartOut(), testing::Eq(11));
+    EXPECT_THAT(para.getTimestepEnd(), testing::Eq(33));
+    EXPECT_THAT(para.getTimestepOut(), testing::Eq(22));
+    EXPECT_THAT(para.getTimestepStartOut(), testing::Eq(11));
     EXPECT_THAT(para.getTimeCalcMedStart(), testing::Eq(22));
     EXPECT_THAT(para.getTimeCalcMedEnd(), testing::Eq(44));
     EXPECT_THAT(para.getPressInID(), testing::Eq(25));
@@ -87,7 +87,7 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
     EXPECT_THAT(para.getViscosityRatio(), RealEq(6.66));
     EXPECT_THAT(para.getVelocityRatio(), RealEq(7.77));
     EXPECT_THAT(para.getDensityRatio(), RealEq(8.88));
-    EXPECT_THAT(para.getPressRatio(), RealEq(9.99));
+    EXPECT_THAT(para.getPressureRatio(), RealEq(9.99));
 
     EXPECT_THAT(para.getRealX(), RealEq(0.1));
     EXPECT_THAT(para.getRealY(), RealEq(0.2));
@@ -113,7 +113,7 @@ TEST(ParameterTest, check_all_Parameter_CanBePassedToConstructor)
         EXPECT_THAT((real)limiters_actual[i], RealEq(limiters[i]));
     }
 
-    EXPECT_THAT(para.getCalcParticle(), testing::Eq(true));
+    EXPECT_THAT(para.getCalcParticles(), testing::Eq(true));
     EXPECT_THAT(para.getParticleBasicLevel(), testing::Eq(1));
     EXPECT_THAT(para.getParticleInitLevel(), testing::Eq(2));
     EXPECT_THAT(para.getNumberOfParticles(), testing::Eq(1111));
@@ -180,7 +180,7 @@ TEST(ParameterTest, setGridPathOverridesConfigFile)
     filePath.replace_filename("parameterTest.cfg");
     vf::basics::ConfigurationFile config;
     config.load(filePath.string());
-    auto para = Parameter(config, 2, 0);
+    auto para = Parameter(2, 0, &config);
     para.setGridPath("gridPathTest");
 
     EXPECT_THAT( para.getGridPath(), testing::Eq("gridPathTest/0/"));
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
index e9b3801e618771b2811d1fa46345349be2e651a9..7c710f50afb0ae07edd53ef9d68e294c7af54ac1 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.cpp
@@ -22,8 +22,8 @@ void allocParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 	{
 		//////////////////////////////////////////////////////////////////////////
 		//all level store the same number of time steps
-		para->getParH(lev)->plp.numberOfTimestepsParticles = para->getTOut() * (unsigned int)pow(2,para->getParticleBasicLevel());
-		para->getParD(lev)->plp.numberOfTimestepsParticles = para->getTOut() * (unsigned int)pow(2,para->getParticleBasicLevel());
+		para->getParH(lev)->plp.numberOfTimestepsParticles = para->getTimestepOut() * (unsigned int)pow(2,para->getParticleBasicLevel());
+		para->getParD(lev)->plp.numberOfTimestepsParticles = para->getTimestepOut() * (unsigned int)pow(2,para->getParticleBasicLevel());
 		//////////////////////////////////////////////////////////////////////////
 		//all level store the same number of Particles
 		para->getParH(lev)->plp.numberOfParticles = para->getNumberOfParticles();
diff --git a/src/gpu/VirtualFluids_GPU/Particles/Particles.h b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
index 4afff9e6969be80719a4bc146b5f41796f5ca33a..7a6d003a08ef7f6517b6259c2c1b895676c6d80b 100644
--- a/src/gpu/VirtualFluids_GPU/Particles/Particles.h
+++ b/src/gpu/VirtualFluids_GPU/Particles/Particles.h
@@ -7,12 +7,12 @@
 #include "Core/StringUtilities/StringUtil.h"
 #include "Parameter/Parameter.h"
 
-//extern "C" void calcDragLift(Parameter* para, int lev);
-extern "C" void allocParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager);
-extern "C" void initParticles(Parameter* para);
-extern "C" void propagateParticles(Parameter* para, unsigned int t);
-extern "C" void copyAndPrintParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager, unsigned int t, bool isInit);
+//void calcDragLift(Parameter* para, int lev);
+void allocParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void initParticles(Parameter* para);
+void propagateParticles(Parameter* para, unsigned int t);
+void copyAndPrintParticles(Parameter* para, CudaMemoryManager* cudaMemoryManager, unsigned int t, bool isInit);
 
-extern "C" void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager);
+void rearrangeGeometry(Parameter* para, CudaMemoryManager* cudaMemoryManager);
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
index fda466354769d11910041a9ea351dd102f9c474d..f5b520acfad74f6787e9e657fce3ccdceed9d539 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.cu
@@ -142,55 +142,56 @@ std::vector<PostProcessingVariable> PlanarAverageProbe::getPostProcessingVariabl
     switch (statistic)
     {
     case Statistic::SpatialMeans:
-        postProcessingVariables.push_back( PostProcessingVariable("vx_spatMean",  velocityRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vx_spatMean",  this->velocityRatio) );
         postProcessingVariables.push_back( PostProcessingVariable("vy_spatMean",  this->velocityRatio) );
         postProcessingVariables.push_back( PostProcessingVariable("vz_spatMean",  this->velocityRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("nut_spatMean", this->viscosityRatio) );
         break;
     case Statistic::SpatioTemporalMeans:
         postProcessingVariables.push_back( PostProcessingVariable("vx_spatTmpMean",  this->velocityRatio) );
         postProcessingVariables.push_back( PostProcessingVariable("vy_spatTmpMean",  this->velocityRatio) );
         postProcessingVariables.push_back( PostProcessingVariable("vz_spatTmpMean",  this->velocityRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("nut_spatTmpMean", this->viscosityRatio) );
         break;
     case Statistic::SpatialCovariances:
-        postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatMean",  pow(this->velocityRatio, 2.0)) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatMean",  this->stressRatio) );
         break;
     case Statistic::SpatioTemporalCovariances:
-        postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatTmpMean",  pow(this->velocityRatio, 2.0)) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvx_spatTmpMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vyvy_spatTmpMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vzvz_spatTmpMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvy_spatTmpMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vxvz_spatTmpMean",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vyvz_spatTmpMean",  this->stressRatio) );
         break;
     case Statistic::SpatialSkewness:
-        postProcessingVariables.push_back( PostProcessingVariable("Sx_spatMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Sy_spatMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Sz_spatMean",  1.0) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sx_spatMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sy_spatMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sz_spatMean",  this->nondimensional) );
         break;
     case Statistic::SpatioTemporalSkewness:
-        postProcessingVariables.push_back( PostProcessingVariable("Sx_spatTmpMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Sy_spatTmpMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Sz_spatTmpMean",  1.0) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sx_spatTmpMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sy_spatTmpMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Sz_spatTmpMean",  this->nondimensional) );
         break;
     case Statistic::SpatialFlatness:
-        postProcessingVariables.push_back( PostProcessingVariable("Fx_spatMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Fy_spatMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Fz_spatMean",  1.0) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fx_spatMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fy_spatMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fz_spatMean",  this->nondimensional) );
         break;
     case Statistic::SpatioTemporalFlatness:
-        postProcessingVariables.push_back( PostProcessingVariable("Fx_spatTmpMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Fy_spatTmpMean",  1.0) );
-        postProcessingVariables.push_back( PostProcessingVariable("Fz_spatTmpMean",  1.0) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fx_spatTmpMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fy_spatTmpMean",  this->nondimensional) );
+        postProcessingVariables.push_back( PostProcessingVariable("Fz_spatTmpMean",  this->nondimensional) );
         break;
 
     default:
-        printf("Statistic unavailable in PlanarAverageProbe\n");
-        assert(false);
+        throw std::runtime_error("PlanarAverageProbe::getPostProcessingVariables: Statistic unavailable!");
         break;
     }
     return postProcessingVariables;
@@ -265,7 +266,7 @@ void PlanarAverageProbe::findPoints(Parameter* para, GridProvider* gridProvider,
 
 ///////////////////////////////////////////////////////////////////////////////////
 
-void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level)
+void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t_level, int level)
 {   
     // Definition of normal and inplane directions for moveIndices kernels
     uint *neighborNormal, *neighborInplane1, *neighborInplane2;
@@ -288,13 +289,14 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
         neighborInplane2 = para->getParD(level)->neighborY;
     }
 
-    bool doTmpAveraging = (t>this->getTStartTmpAveraging());
+    bool doTmpAveraging = t_level>=(this->getTStartTmpAveraging()*pow(2,level));
 
     // Pointer casts to use device arrays in thrust reductions
     thrust::device_ptr<uint> indices_thrust = thrust::device_pointer_cast(probeStruct->pointIndicesD);
     thrust::device_ptr<real> vx_thrust = thrust::device_pointer_cast(para->getParD(level)->velocityX);
     thrust::device_ptr<real> vy_thrust = thrust::device_pointer_cast(para->getParD(level)->velocityY);
     thrust::device_ptr<real> vz_thrust = thrust::device_pointer_cast(para->getParD(level)->velocityZ);
+    thrust::device_ptr<real> nut_thrust = thrust::device_pointer_cast(para->getParD(level)->turbViscosity);
 
     real N = (real)probeStruct->nIndices;
     real n = (real)probeStruct->vals;
@@ -308,10 +310,12 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
     thrust::permutation_iterator<valIterator, indIterator> vy_iter_end  (vy_thrust, indices_thrust+probeStruct->nIndices);
     thrust::permutation_iterator<valIterator, indIterator> vz_iter_begin(vz_thrust, indices_thrust);
     thrust::permutation_iterator<valIterator, indIterator> vz_iter_end  (vz_thrust, indices_thrust+probeStruct->nIndices);
+    thrust::permutation_iterator<valIterator, indIterator> nut_iter_begin(nut_thrust, indices_thrust);
+    thrust::permutation_iterator<valIterator, indIterator> nut_iter_end  (nut_thrust, indices_thrust+probeStruct->nIndices);
 
     for( uint i=0; i<nPoints; i++ )
     {
-        uint node = this->isEvenTAvg? i : nPoints-1-i; // Note, loop moves in positive normal dir at even calls and in negative normal dir in odd calls
+        uint node = probeStruct->isEvenTAvg? i : nPoints-1-i; // Note, loop moves in positive normal dir at even calls and in negative normal dir in odd calls
 
         if(probeStruct->quantitiesH[int(Statistic::SpatialMeans)])
         {
@@ -320,10 +324,14 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
             real spatMean_vy = thrust::reduce(vy_iter_begin, vy_iter_end)/N;
             real spatMean_vz = thrust::reduce(vz_iter_begin, vz_iter_end)/N;
 
+            real spatMean_nut;
+            if(para->getUseTurbulentViscosity()) spatMean_nut = thrust::reduce(nut_iter_begin, nut_iter_end)/N;
+
             uint arrOff = probeStruct->arrayOffsetsH[int(Statistic::SpatialMeans)];
             probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] = spatMean_vx;
             probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] = spatMean_vy;
             probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] = spatMean_vz;
+            if(para->getUseTurbulentViscosity()) probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] = spatMean_nut;
 
             if(probeStruct->quantitiesH[int(Statistic::SpatioTemporalMeans)] && doTmpAveraging)
             {
@@ -331,10 +339,14 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
             real spatTmpMean_vx_old = probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node];
             real spatTmpMean_vy_old = probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node];
             real spatTmpMean_vz_old = probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node];
+            real spatTmpMean_nut_old;
+            if(para->getUseTurbulentViscosity()) spatTmpMean_nut_old = probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node];;
 
             probeStruct->quantitiesArrayH[(arrOff+0)*nPoints+node] += (spatMean_vx-spatTmpMean_vx_old)/n;
             probeStruct->quantitiesArrayH[(arrOff+1)*nPoints+node] += (spatMean_vy-spatTmpMean_vy_old)/n;
             probeStruct->quantitiesArrayH[(arrOff+2)*nPoints+node] += (spatMean_vz-spatTmpMean_vz_old)/n;
+            if(para->getUseTurbulentViscosity()) probeStruct->quantitiesArrayH[(arrOff+3)*nPoints+node] += (spatMean_nut-spatTmpMean_nut_old)/n;
+
             }
         
             if(probeStruct->quantitiesH[int(Statistic::SpatialCovariances)])
@@ -445,13 +457,13 @@ void PlanarAverageProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Para
         if(i<probeStruct->nPoints-1)
         {
             vf::cuda::CudaGrid grid = vf::cuda::CudaGrid(para->getParH(level)->numberofthreads, probeStruct->nIndices);
-            if(this->isEvenTAvg) 
+            if(probeStruct->isEvenTAvg) 
                 moveIndicesInPosNormalDir<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nIndices, neighborNormal, para->getParD(level)->coordinateX, para->getParD(level)->coordinateY, para->getParD(level)->coordinateZ );
             else 
                 moveIndicesInNegNormalDir<<<grid.grid, grid.threads>>>( probeStruct->pointIndicesD, probeStruct->nIndices, para->getParD(level)->neighborInverse, neighborInplane1, neighborInplane2, para->getParD(level)->coordinateX, para->getParD(level)->coordinateY, para->getParD(level)->coordinateZ ); 
         } 
     }
-    this->isEvenTAvg=!this->isEvenTAvg;
+    probeStruct->isEvenTAvg=!(probeStruct->isEvenTAvg);
 
     getLastCudaError("PlanarAverageProbe::calculateQuantities execution failed");
 }
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
index 7054f5fc7e02453418285281a0ea9cf9c32dc0c0..d11f8e76e4d13113b201af5494b7d0cfcfe18353 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlanarAverageProbe.h
@@ -40,6 +40,8 @@
 #ifndef PlanarAverageProbe_H
 #define PlanarAverageProbe_H
 
+#include <iostream>
+
 #include "Probe.h"
 
 __global__ void moveIndicesInNegNormalDir( uint* pointIndices, uint nPoints, uint* neighborWSB, uint* neighborInplane1, uint* neighborInplane2, real* coordsX, real* coordsY, real* coordsZ ); 
@@ -72,7 +74,8 @@ public:
         planeNormal(_planeNormal)
 
     {   
-        assert(_planeNormal == 'x' || _planeNormal == 'y' || _planeNormal == 'z');
+        if(!(_planeNormal == 'x' || _planeNormal == 'y' || _planeNormal == 'z')) 
+            throw std::runtime_error("PlanarAverageProbe: planeNormal must be 'x', 'y' or 'z'!");
     }
 
 
@@ -91,7 +94,6 @@ private:
     real posX, posY, posZ;
     real deltaX, deltaY, deltaZ;
     char planeNormal;
-    bool isEvenTAvg = true;
 };
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
index d2b101bc26bb5fd4e173ebaef20333225b4e7467..7d1c0205219737e4b28acbb1a893a0a6071ae9de 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PlaneProbe.cu
@@ -57,15 +57,14 @@ std::vector<PostProcessingVariable> PlaneProbe::getPostProcessingVariables(Stati
         postProcessingVariables.push_back( PostProcessingVariable("rho_mean", this->densityRatio ) );
         break;
     case Statistic::Variances:
-        postProcessingVariables.push_back( PostProcessingVariable("vx_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vy_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vz_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("rho_var", pow(this->densityRatio,  2.0)) );
+        postProcessingVariables.push_back( PostProcessingVariable("vx_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vy_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vz_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("rho_var", this->densityRatio) );
         break;
 
     default:
-        printf("Statistic unavailable in PlaneProbe\n");
-        assert(false);
+        throw std::runtime_error("PlaneProbe::getPostProcessingVariables: Statistic unavailable!");
         break;
     }
     return postProcessingVariables;
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
index 677710ec87ca091b45bdd665db3b58103874634d..e78a98f02ac2093fc46b4daa4a2485ed1395275b 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/PointProbe.cu
@@ -55,15 +55,14 @@ std::vector<PostProcessingVariable> PointProbe::getPostProcessingVariables(Stati
         postProcessingVariables.push_back( PostProcessingVariable("rho_mean", this->densityRatio ) );
         break;
     case Statistic::Variances:
-        postProcessingVariables.push_back( PostProcessingVariable("vx_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vy_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("vz_var",  pow(this->velocityRatio, 2.0)) );
-        postProcessingVariables.push_back( PostProcessingVariable("rho_var", pow(this->densityRatio,  2.0)) );
+        postProcessingVariables.push_back( PostProcessingVariable("vx_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vy_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("vz_var",  this->stressRatio) );
+        postProcessingVariables.push_back( PostProcessingVariable("rho_var", this->densityRatio) );
         break;
 
     default:
-        printf("Statistic unavailable in PointProbe\n");
-        assert(false);
+        throw std::runtime_error("PointProbe::getPostProcessingVariables: Statistic unavailable!");
         break;
     }
     return postProcessingVariables;
@@ -114,7 +113,7 @@ void PointProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* p
 void PointProbe::addProbePointsFromList(std::vector<real>& _pointCoordsX, std::vector<real>& _pointCoordsY, std::vector<real>& _pointCoordsZ)
 {
     bool isSameLength = ( (_pointCoordsX.size()==_pointCoordsY.size()) && (_pointCoordsY.size()==_pointCoordsZ.size()));
-    assert("Probe: point lists have different lengths" && isSameLength);
+    if (!isSameLength) throw std::runtime_error("Probe::addProbePointsFromList(): point lists have different lengths!");
     this->pointCoordsX.insert(this->pointCoordsX.end(), _pointCoordsX.begin(),  _pointCoordsX.end());
     this->pointCoordsY.insert(this->pointCoordsY.end(), _pointCoordsY.begin(),  _pointCoordsY.end());
     this->pointCoordsZ.insert(this->pointCoordsZ.end(), _pointCoordsZ.begin(),  _pointCoordsZ.end());
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
index c9efd50307cf2c6751aa40dadd2316ab35df2331..1512655c365a8cf4de1917f5cd469fb66259e45c 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.cu
@@ -37,6 +37,7 @@
 #include <helper_cuda.h>
 
 #include "VirtualFluids_GPU/GPU/GeometryUtils.h"
+#include <lbm/constants/NumericConstants.h>
 #include "basics/writer/WbWriterVtkXmlBinary.h"
 #include <Core/StringUtilities/StringUtil.h>
 
@@ -44,6 +45,7 @@
 #include "DataStructureInitializer/GridProvider.h"
 #include "GPU/CudaMemoryManager.h"
 
+using namespace vf::lbm::constant;
 
 __device__ void calculatePointwiseQuantities(uint n, real* quantityArray, bool* quantities, uint* quantityArrayOffsets, uint nPoints, uint node, real vx, real vy, real vz, real rho)
 {
@@ -177,13 +179,17 @@ __global__ void interpAndCalcQuantitiesKernel(   uint* pointIndices,
 
 bool Probe::getHasDeviceQuantityArray(){ return this->hasDeviceQuantityArray; }
 
+real Probe::getNondimensionalConversionFactor(int level){ return c1o1; }
+
 void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager)
 {
-    this->velocityRatio      = para->getVelocityRatio();
-    this->densityRatio       = para->getDensityRatio();
-    this->forceRatio         = para->getForceRatio();
-    this->stressRatio        = para->getDensityRatio()*pow(para->getVelocityRatio(), 2.0);
-    this->accelerationRatio = para->getVelocityRatio()/para->getTimeRatio();
+    using std::placeholders::_1;
+    this->velocityRatio      = std::bind(&Parameter::getScaledVelocityRatio,        para, _1); 
+    this->densityRatio       = std::bind(&Parameter::getScaledDensityRatio,         para, _1);
+    this->forceRatio         = std::bind(&Parameter::getScaledForceRatio,           para, _1);
+    this->stressRatio        = std::bind(&Parameter::getScaledPressureRatio,        para, _1);
+    this->viscosityRatio     = std::bind(&Parameter::getScaledViscosityRatio,       para, _1);
+    this->nondimensional     = std::bind(&Probe::getNondimensionalConversionFactor, this, _1);
 
     probeParams.resize(para->getMaxLevel()+1);
 
@@ -196,7 +202,7 @@ void Probe::init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager*
         std::vector<real> pointCoordsX_level;
         std::vector<real> pointCoordsY_level;
         std::vector<real> pointCoordsZ_level;
-
+        
         this->findPoints(para, gridProvider, probeIndices_level, distX_level, distY_level, distZ_level,      
                        pointCoordsX_level, pointCoordsY_level, pointCoordsZ_level,
                        level);
@@ -274,16 +280,23 @@ void Probe::addProbeStruct(CudaMemoryManager* cudaMemoryManager, std::vector<int
 
 void Probe::interact(Parameter* para, CudaMemoryManager* cudaMemoryManager, int level, uint t)
 {
-    if(max(int(t) - int(this->tStartAvg), -1) % this->tAvg==0)
+    uint t_level = para->getTimeStep(level, t, false);
+
+    //! if tAvg==1 the probe will be evaluated in every sub-timestep of each respective level
+    //! else, the probe will only be evaluated in each synchronous time step tAvg
+
+    uint tAvg_level = this->tAvg==1? this->tAvg: this->tAvg*pow(2,level);          
+
+    if(max(int(t_level) - int(this->tStartAvg*pow(2,level)), -1) % tAvg_level==0)
     {
         SPtr<ProbeStruct> probeStruct = this->getProbeStruct(level);
-
-        this->calculateQuantities(probeStruct, para, t, level);
-        if(t>=this->tStartTmpAveraging) probeStruct->vals++;
+        this->calculateQuantities(probeStruct, para, t_level, level);
+        if(t_level>=(this->tStartTmpAveraging*pow(2,level))) probeStruct->vals++;
     }
 
-    if(max(int(t) - int(this->tStartOut), -1) % this->tOut == 0)
-    {
+    //! output only in synchronous timesteps
+    if(max(int(t_level) - int(this->tStartOut*pow(2,level)), -1) % int(this->tOut*pow(2,level)) == 0)
+    {   
         if(this->hasDeviceQuantityArray)
             cudaMemoryManager->cudaCopyProbeQuantityArrayDtoH(this, level);
         this->write(para, level, t);
@@ -304,7 +317,7 @@ void Probe::free(Parameter* para, CudaMemoryManager* cudaMemoryManager)
 
 void Probe::addStatistic(Statistic variable)
 {
-    assert(this->isAvailableStatistic(variable));
+    if (!this->isAvailableStatistic(variable)) throw std::runtime_error("Probe::addStatistic(): Statistic not available for this probe type!");
 
     this->quantities[int(variable)] = true;
     switch(variable)
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
index 59a4c4c271e058f87cfde51aa0e04a95161a732c..9bb01cc73240a85fd4f3a8be8003a06fe672bac0 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/Probe.h
@@ -43,6 +43,8 @@
 #ifndef Probe_H
 #define Probe_H
 
+#include <iostream>
+
 #include <cuda.h>
 
 #include "PreCollisionInteractor/PreCollisionInteractor.h"
@@ -91,9 +93,9 @@ enum class Statistic{
 
 typedef struct PostProcessingVariable{
     std::string name;
-    real conversionFactor;
+    std::function<real(int)> conversionFactor;
     PostProcessingVariable( std::string _name, 
-                            real        _conversionFactor): 
+                            std::function<real(int)>  _conversionFactor): 
     name(_name), conversionFactor(_conversionFactor){};
 } PostProcessingVariable;
 
@@ -106,6 +108,7 @@ struct ProbeStruct{
     real *quantitiesArrayH, *quantitiesArrayD;
     bool *quantitiesH, *quantitiesD;
     uint *arrayOffsetsH, *arrayOffsetsD;
+    bool isEvenTAvg = true;
 };
 
 __global__ void calcQuantitiesKernel(   uint* pointIndices,
@@ -150,7 +153,8 @@ public:
         outputTimeSeries(_outputTimeSeries),        
         PreCollisionInteractor()
     {
-        assert("Output starts before averaging!" && tStartOut>=tStartAvg);
+        if (_tStartOut<_tStartAvg)      throw std::runtime_error("Probe: tStartOut must be larger than tStartAvg!");
+        if (_tStartTmpAvg<_tStartAvg)   throw std::runtime_error("Probe: tStartTmpAvg must be larger than tStartAvg!");
     }
     
     void init(Parameter* para, GridProvider* gridProvider, CudaMemoryManager* cudaMemoryManager) override;
@@ -170,6 +174,8 @@ public:
 
 protected:
     virtual WbWriterVtkXmlBinary* getWriter(){ return WbWriterVtkXmlBinary::getInstance(); };
+    real getNondimensionalConversionFactor(int level);
+
 private:
     virtual bool isAvailableStatistic(Statistic _variable) = 0;
 
@@ -209,17 +215,19 @@ protected:
 protected:
     uint tStartAvg;
     uint tStartTmpAveraging; //!> only non-zero in PlanarAverageProbe and WallModelProbe to switch on Spatio-temporal averaging (while only doing spatial averaging for t<tStartTmpAveraging) 
-    uint tAvg;
+    uint tAvg;  //! for tAvg==1 the probe will be evaluated in every sub-timestep of each respective level, else, the probe will only be evaluated in each synchronous time step 
     uint tStartOut;
     uint tOut;
 
     uint tProbe = 0; //!> counter for number of probe evaluations. Only used when outputting timeseries
 
-    real velocityRatio;
-    real densityRatio;
-    real forceRatio;
-    real stressRatio;
-    real accelerationRatio;
+
+    std::function<real(int)> velocityRatio;
+    std::function<real(int)> densityRatio;
+    std::function<real(int)> forceRatio;
+    std::function<real(int)> stressRatio;
+    std::function<real(int)> viscosityRatio;
+    std::function<real(int)> nondimensional;
 };
 
 #endif
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
index 4bcfce7363a7ddf1496d68c81b13c761e97b4e5f..81da15595baae55aa562bc77e24442a9258d992f 100644
--- a/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
+++ b/src/gpu/VirtualFluids_GPU/PreCollisionInteractor/Probes/WallModelProbe.cu
@@ -117,9 +117,9 @@ std::vector<PostProcessingVariable> WallModelProbe::getPostProcessingVariables(S
         postProcessingVariables.push_back( PostProcessingVariable("Fz_spatMean",     this->outputStress? this->stressRatio: this->forceRatio) );
         if(this->evaluatePressureGradient)
         {
-            postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatMean",     this->accelerationRatio) ); 
-            postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatMean",     this->accelerationRatio) );
-            postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatMean",     this->accelerationRatio) );
+            postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatMean",     this->forceRatio) ); 
+            postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatMean",     this->forceRatio) );
+            postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatMean",     this->forceRatio) );
         }
         break;
     case Statistic::SpatioTemporalMeans:
@@ -135,15 +135,14 @@ std::vector<PostProcessingVariable> WallModelProbe::getPostProcessingVariables(S
         postProcessingVariables.push_back( PostProcessingVariable("Fz_spatTmpMean",     this->outputStress? this->stressRatio: this->forceRatio) );
         if(this->evaluatePressureGradient)
         {
-            postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatTmpMean",     this->accelerationRatio) ); 
-            postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatTmpMean",     this->accelerationRatio) );
-            postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatTmpMean",     this->accelerationRatio) );
+            postProcessingVariables.push_back( PostProcessingVariable("dpdx_spatTmpMean",     this->forceRatio) ); 
+            postProcessingVariables.push_back( PostProcessingVariable("dpdy_spatTmpMean",     this->forceRatio) );
+            postProcessingVariables.push_back( PostProcessingVariable("dpdz_spatTmpMean",     this->forceRatio) );
         }
         break;
 
     default:
-        printf("Statistic unavailable in WallModelProbe\n");
-        assert(false);
+        throw std::runtime_error("WallModelProbe::getPostProcessingVariables: Statistic unavailable!");
         break;
     }
     return postProcessingVariables;
@@ -156,10 +155,10 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
                             std::vector<real>& pointCoordsX_level, std::vector<real>& pointCoordsY_level, std::vector<real>& pointCoordsZ_level,
                             int level)
 {
-    assert( para->getParD(level)->stressBC.numberOfBCnodes > 0 && para->getHasWallModelMonitor() );
+    if ( !para->getHasWallModelMonitor())                    throw std::runtime_error("WallModelProbe::findPoints(): !para->getHasWallModelMonitor() !");
 
     real dt = para->getTimeRatio();
-    uint nt = uint((para->getTEnd()-this->tStartAvg)/this->tAvg);
+    uint nt = uint((para->getTimestepEnd()-this->tStartAvg)/this->tAvg);
     
     for(uint t=0; t<nt; t++)
     {
@@ -170,7 +169,7 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
 
     if(this->evaluatePressureGradient)
     {
-        assert(para->getIsBodyForce());
+        if (!para->getIsBodyForce()) throw std::runtime_error("WallModelProbe::findPoints(): bodyforce not allocated!");
         // Find all fluid nodes
         for(uint j=1; j<para->getParH(level)->numberOfNodes; j++ )
         {
@@ -187,6 +186,10 @@ void WallModelProbe::findPoints(Parameter* para, GridProvider* gridProvider, std
 void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Parameter* para, uint t, int level)
 {   
     bool doTmpAveraging = (t>this->getTStartTmpAveraging());
+    real N = para->getParD(level)->stressBC.numberOfBCnodes;
+    if(N<1) return; //Skipping levels without StressBC
+    real n = (real)probeStruct->vals;
+    int nPoints = probeStruct->nPoints;
 
     // Pointer casts to use device arrays in thrust reductions
     thrust::device_ptr<real> u_el_thrust    = thrust::device_pointer_cast(para->getParD(level)->stressBC.Vx);
@@ -213,10 +216,6 @@ void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Paramete
     thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_begin(dpdz_thrust, indices_thrust);
     thrust::permutation_iterator<valIterator, indIterator> dpdz_iter_end  (dpdz_thrust, indices_thrust+probeStruct->nIndices);
 
-    real N = para->getParD(level)->stressBC.numberOfBCnodes;
-    real n = (real)probeStruct->vals;
-    int nPoints = probeStruct->nPoints;
-
     if(probeStruct->quantitiesH[int(Statistic::SpatialMeans)])
     {
         // Compute the instantaneous spatial means of the velocity moments 
@@ -293,6 +292,7 @@ void WallModelProbe::calculateQuantities(SPtr<ProbeStruct> probeStruct, Paramete
             }
         }    
     }
+        
 
     this->tProbe += 1;
     getLastCudaError("WallModelProbe::calculateQuantities execution failed");
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
index d4d4b97681d6596b4bc3752d74774f035256b9c4..d40e60c764054f8ac6c1793ea3e3573ed04a84fc 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 
 
-extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 			Distributions27 D27;
 			if (EvenOrOdd == true)
 			{
-				D27.f[E] = &DD27[E   *size_Mat];
-				D27.f[W] = &DD27[W   *size_Mat];
-				D27.f[N] = &DD27[N   *size_Mat];
-				D27.f[S] = &DD27[S   *size_Mat];
-				D27.f[T] = &DD27[T   *size_Mat];
-				D27.f[B] = &DD27[B   *size_Mat];
-				D27.f[NE] = &DD27[NE  *size_Mat];
-				D27.f[SW] = &DD27[SW  *size_Mat];
-				D27.f[SE] = &DD27[SE  *size_Mat];
-				D27.f[NW] = &DD27[NW  *size_Mat];
-				D27.f[TE] = &DD27[TE  *size_Mat];
-				D27.f[BW] = &DD27[BW  *size_Mat];
-				D27.f[BE] = &DD27[BE  *size_Mat];
-				D27.f[TW] = &DD27[TW  *size_Mat];
-				D27.f[TN] = &DD27[TN  *size_Mat];
-				D27.f[BS] = &DD27[BS  *size_Mat];
-				D27.f[BN] = &DD27[BN  *size_Mat];
-				D27.f[TS] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[TNE] = &DD27[TNE *size_Mat];
-				D27.f[TSW] = &DD27[TSW *size_Mat];
-				D27.f[TSE] = &DD27[TSE *size_Mat];
-				D27.f[TNW] = &DD27[TNW *size_Mat];
-				D27.f[BNE] = &DD27[BNE *size_Mat];
-				D27.f[BSW] = &DD27[BSW *size_Mat];
-				D27.f[BSE] = &DD27[BSE *size_Mat];
-				D27.f[BNW] = &DD27[BNW *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_PMM]= &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_MPM]= &DD27[DIR_MPM *size_Mat];
 			}
 			else
 			{
-				D27.f[W] = &DD27[E   *size_Mat];
-				D27.f[E] = &DD27[W   *size_Mat];
-				D27.f[S] = &DD27[N   *size_Mat];
-				D27.f[N] = &DD27[S   *size_Mat];
-				D27.f[B] = &DD27[T   *size_Mat];
-				D27.f[T] = &DD27[B   *size_Mat];
-				D27.f[SW] = &DD27[NE  *size_Mat];
-				D27.f[NE] = &DD27[SW  *size_Mat];
-				D27.f[NW] = &DD27[SE  *size_Mat];
-				D27.f[SE] = &DD27[NW  *size_Mat];
-				D27.f[BW] = &DD27[TE  *size_Mat];
-				D27.f[TE] = &DD27[BW  *size_Mat];
-				D27.f[TW] = &DD27[BE  *size_Mat];
-				D27.f[BE] = &DD27[TW  *size_Mat];
-				D27.f[BS] = &DD27[TN  *size_Mat];
-				D27.f[TN] = &DD27[BS  *size_Mat];
-				D27.f[TS] = &DD27[BN  *size_Mat];
-				D27.f[BN] = &DD27[TS  *size_Mat];
-				D27.f[REST] = &DD27[REST*size_Mat];
-				D27.f[BSW] = &DD27[TNE *size_Mat];
-				D27.f[BNE] = &DD27[TSW *size_Mat];
-				D27.f[BNW] = &DD27[TSE *size_Mat];
-				D27.f[BSE] = &DD27[TNW *size_Mat];
-				D27.f[TSW] = &DD27[BNE *size_Mat];
-				D27.f[TNE] = &DD27[BSW *size_Mat];
-				D27.f[TNW] = &DD27[BSE *size_Mat];
-				D27.f[TSE] = &DD27[BNW *size_Mat];
+				D27.f[DIR_M00] = &DD27[DIR_P00   *size_Mat];
+				D27.f[DIR_P00] = &DD27[DIR_M00   *size_Mat];
+				D27.f[DIR_0M0] = &DD27[DIR_0P0   *size_Mat];
+				D27.f[DIR_0P0] = &DD27[DIR_0M0   *size_Mat];
+				D27.f[DIR_00M] = &DD27[DIR_00P   *size_Mat];
+				D27.f[DIR_00P] = &DD27[DIR_00M   *size_Mat];
+				D27.f[DIR_MM0] = &DD27[DIR_PP0  *size_Mat];
+				D27.f[DIR_PP0] = &DD27[DIR_MM0  *size_Mat];
+				D27.f[DIR_MP0] = &DD27[DIR_PM0  *size_Mat];
+				D27.f[DIR_PM0] = &DD27[DIR_MP0  *size_Mat];
+				D27.f[DIR_M0M] = &DD27[DIR_P0P  *size_Mat];
+				D27.f[DIR_P0P] = &DD27[DIR_M0M  *size_Mat];
+				D27.f[DIR_M0P] = &DD27[DIR_P0M  *size_Mat];
+				D27.f[DIR_P0M] = &DD27[DIR_M0P  *size_Mat];
+				D27.f[DIR_0MM] = &DD27[DIR_0PP  *size_Mat];
+				D27.f[DIR_0PP] = &DD27[DIR_0MM  *size_Mat];
+				D27.f[DIR_0MP] = &DD27[DIR_0PM  *size_Mat];
+				D27.f[DIR_0PM] = &DD27[DIR_0MP  *size_Mat];
+				D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+				D27.f[DIR_MMM] = &DD27[DIR_PPP *size_Mat];
+				D27.f[DIR_PPM] = &DD27[DIR_MMP *size_Mat];
+				D27.f[DIR_MPM]= &DD27[DIR_PMP *size_Mat];
+				D27.f[DIR_PMM]= &DD27[DIR_MPP *size_Mat];
+				D27.f[DIR_MMP] = &DD27[DIR_PPM *size_Mat];
+				D27.f[DIR_PPP] = &DD27[DIR_MMM *size_Mat];
+				D27.f[DIR_MPP] = &DD27[DIR_PMM *size_Mat];
+				D27.f[DIR_PMP] = &DD27[DIR_MPM *size_Mat];
 			}
 			//////////////////////////////////////////////////////////////////////////
 			real ConcD = Conc[k];
@@ -167,33 +167,33 @@ extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 			////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 			real cu_sq = c3o2*(vx1*vx1 + vx2*vx2 + vx3*vx3);
 
-			(D27.f[REST])[kzero] = c8o27* ConcD*(c1o1 - cu_sq);
-			(D27.f[E])[ke] = c2o27* ConcD*(c1o1 + c3o1*(vx1)+c9o2*(vx1)*(vx1)-cu_sq);
-			(D27.f[W])[kw] = c2o27* ConcD*(c1o1 + c3o1*(-vx1) + c9o2*(-vx1)*(-vx1) - cu_sq);
-			(D27.f[N])[kn] = c2o27* ConcD*(c1o1 + c3o1*(vx2)+c9o2*(vx2)*(vx2)-cu_sq);
-			(D27.f[S])[ks] = c2o27* ConcD*(c1o1 + c3o1*(-vx2) + c9o2*(-vx2)*(-vx2) - cu_sq);
-			(D27.f[T])[kt] = c2o27* ConcD*(c1o1 + c3o1*(vx3)+c9o2*(vx3)*(vx3)-cu_sq);
-			(D27.f[B])[kb] = c2o27* ConcD*(c1o1 + c3o1*(-vx3) + c9o2*(-vx3)*(-vx3) - cu_sq);
-			(D27.f[NE])[kne] = c1o54* ConcD*(c1o1 + c3o1*(vx1 + vx2) + c9o2*(vx1 + vx2)*(vx1 + vx2) - cu_sq);
-			(D27.f[SW])[ksw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 - vx2) + c9o2*(-vx1 - vx2)*(-vx1 - vx2) - cu_sq);
-			(D27.f[SE])[kse] = c1o54* ConcD*(c1o1 + c3o1*(vx1 - vx2) + c9o2*(vx1 - vx2)*(vx1 - vx2) - cu_sq);
-			(D27.f[NW])[knw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 + vx2) + c9o2*(-vx1 + vx2)*(-vx1 + vx2) - cu_sq);
-			(D27.f[TE])[kte] = c1o54* ConcD*(c1o1 + c3o1*(vx1 + vx3) + c9o2*(vx1 + vx3)*(vx1 + vx3) - cu_sq);
-			(D27.f[BW])[kbw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 - vx3) + c9o2*(-vx1 - vx3)*(-vx1 - vx3) - cu_sq);
-			(D27.f[BE])[kbe] = c1o54* ConcD*(c1o1 + c3o1*(vx1 - vx3) + c9o2*(vx1 - vx3)*(vx1 - vx3) - cu_sq);
-			(D27.f[TW])[ktw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 + vx3) + c9o2*(-vx1 + vx3)*(-vx1 + vx3) - cu_sq);
-			(D27.f[TN])[ktn] = c1o54* ConcD*(c1o1 + c3o1*(vx2 + vx3) + c9o2*(vx2 + vx3)*(vx2 + vx3) - cu_sq);
-			(D27.f[BS])[kbs] = c1o54* ConcD*(c1o1 + c3o1*(-vx2 - vx3) + c9o2*(-vx2 - vx3)*(-vx2 - vx3) - cu_sq);
-			(D27.f[BN])[kbn] = c1o54* ConcD*(c1o1 + c3o1*(vx2 - vx3) + c9o2*(vx2 - vx3)*(vx2 - vx3) - cu_sq);
-			(D27.f[TS])[kts] = c1o54* ConcD*(c1o1 + c3o1*(-vx2 + vx3) + c9o2*(-vx2 + vx3)*(-vx2 + vx3) - cu_sq);
-			(D27.f[TNE])[ktne] = c1o216*ConcD*(c1o1 + c3o1*(vx1 + vx2 + vx3) + c9o2*(vx1 + vx2 + vx3)*(vx1 + vx2 + vx3) - cu_sq);
-			(D27.f[BSW])[kbsw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 - vx2 - vx3) + c9o2*(-vx1 - vx2 - vx3)*(-vx1 - vx2 - vx3) - cu_sq);
-			(D27.f[BNE])[kbne] = c1o216*ConcD*(c1o1 + c3o1*(vx1 + vx2 - vx3) + c9o2*(vx1 + vx2 - vx3)*(vx1 + vx2 - vx3) - cu_sq);
-			(D27.f[TSW])[ktsw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 - vx2 + vx3) + c9o2*(-vx1 - vx2 + vx3)*(-vx1 - vx2 + vx3) - cu_sq);
-			(D27.f[TSE])[ktse] = c1o216*ConcD*(c1o1 + c3o1*(vx1 - vx2 + vx3) + c9o2*(vx1 - vx2 + vx3)*(vx1 - vx2 + vx3) - cu_sq);
-			(D27.f[BNW])[kbnw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 + vx2 - vx3) + c9o2*(-vx1 + vx2 - vx3)*(-vx1 + vx2 - vx3) - cu_sq);
-			(D27.f[BSE])[kbse] = c1o216*ConcD*(c1o1 + c3o1*(vx1 - vx2 - vx3) + c9o2*(vx1 - vx2 - vx3)*(vx1 - vx2 - vx3) - cu_sq);
-			(D27.f[TNW])[ktnw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 + vx2 + vx3) + c9o2*(-vx1 + vx2 + vx3)*(-vx1 + vx2 + vx3) - cu_sq);
+			(D27.f[DIR_000])[kzero] = c8o27* ConcD*(c1o1 - cu_sq);
+			(D27.f[DIR_P00])[ke] = c2o27* ConcD*(c1o1 + c3o1*(vx1)+c9o2*(vx1)*(vx1)-cu_sq);
+			(D27.f[DIR_M00])[kw] = c2o27* ConcD*(c1o1 + c3o1*(-vx1) + c9o2*(-vx1)*(-vx1) - cu_sq);
+			(D27.f[DIR_0P0])[kn] = c2o27* ConcD*(c1o1 + c3o1*(vx2)+c9o2*(vx2)*(vx2)-cu_sq);
+			(D27.f[DIR_0M0])[ks] = c2o27* ConcD*(c1o1 + c3o1*(-vx2) + c9o2*(-vx2)*(-vx2) - cu_sq);
+			(D27.f[DIR_00P])[kt] = c2o27* ConcD*(c1o1 + c3o1*(vx3)+c9o2*(vx3)*(vx3)-cu_sq);
+			(D27.f[DIR_00M])[kb] = c2o27* ConcD*(c1o1 + c3o1*(-vx3) + c9o2*(-vx3)*(-vx3) - cu_sq);
+			(D27.f[DIR_PP0])[kne] = c1o54* ConcD*(c1o1 + c3o1*(vx1 + vx2) + c9o2*(vx1 + vx2)*(vx1 + vx2) - cu_sq);
+			(D27.f[DIR_MM0])[ksw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 - vx2) + c9o2*(-vx1 - vx2)*(-vx1 - vx2) - cu_sq);
+			(D27.f[DIR_PM0])[kse] = c1o54* ConcD*(c1o1 + c3o1*(vx1 - vx2) + c9o2*(vx1 - vx2)*(vx1 - vx2) - cu_sq);
+			(D27.f[DIR_MP0])[knw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 + vx2) + c9o2*(-vx1 + vx2)*(-vx1 + vx2) - cu_sq);
+			(D27.f[DIR_P0P])[kte] = c1o54* ConcD*(c1o1 + c3o1*(vx1 + vx3) + c9o2*(vx1 + vx3)*(vx1 + vx3) - cu_sq);
+			(D27.f[DIR_M0M])[kbw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 - vx3) + c9o2*(-vx1 - vx3)*(-vx1 - vx3) - cu_sq);
+			(D27.f[DIR_P0M])[kbe] = c1o54* ConcD*(c1o1 + c3o1*(vx1 - vx3) + c9o2*(vx1 - vx3)*(vx1 - vx3) - cu_sq);
+			(D27.f[DIR_M0P])[ktw] = c1o54* ConcD*(c1o1 + c3o1*(-vx1 + vx3) + c9o2*(-vx1 + vx3)*(-vx1 + vx3) - cu_sq);
+			(D27.f[DIR_0PP])[ktn] = c1o54* ConcD*(c1o1 + c3o1*(vx2 + vx3) + c9o2*(vx2 + vx3)*(vx2 + vx3) - cu_sq);
+			(D27.f[DIR_0MM])[kbs] = c1o54* ConcD*(c1o1 + c3o1*(-vx2 - vx3) + c9o2*(-vx2 - vx3)*(-vx2 - vx3) - cu_sq);
+			(D27.f[DIR_0PM])[kbn] = c1o54* ConcD*(c1o1 + c3o1*(vx2 - vx3) + c9o2*(vx2 - vx3)*(vx2 - vx3) - cu_sq);
+			(D27.f[DIR_0MP])[kts] = c1o54* ConcD*(c1o1 + c3o1*(-vx2 + vx3) + c9o2*(-vx2 + vx3)*(-vx2 + vx3) - cu_sq);
+			(D27.f[DIR_PPP])[ktne] = c1o216*ConcD*(c1o1 + c3o1*(vx1 + vx2 + vx3) + c9o2*(vx1 + vx2 + vx3)*(vx1 + vx2 + vx3) - cu_sq);
+			(D27.f[DIR_MMM])[kbsw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 - vx2 - vx3) + c9o2*(-vx1 - vx2 - vx3)*(-vx1 - vx2 - vx3) - cu_sq);
+			(D27.f[DIR_PPM])[kbne] = c1o216*ConcD*(c1o1 + c3o1*(vx1 + vx2 - vx3) + c9o2*(vx1 + vx2 - vx3)*(vx1 + vx2 - vx3) - cu_sq);
+			(D27.f[DIR_MMP])[ktsw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 - vx2 + vx3) + c9o2*(-vx1 - vx2 + vx3)*(-vx1 - vx2 + vx3) - cu_sq);
+			(D27.f[DIR_PMP])[ktse] = c1o216*ConcD*(c1o1 + c3o1*(vx1 - vx2 + vx3) + c9o2*(vx1 - vx2 + vx3)*(vx1 - vx2 + vx3) - cu_sq);
+			(D27.f[DIR_MPM])[kbnw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 + vx2 - vx3) + c9o2*(-vx1 + vx2 - vx3)*(-vx1 + vx2 - vx3) - cu_sq);
+			(D27.f[DIR_PMM])[kbse] = c1o216*ConcD*(c1o1 + c3o1*(vx1 - vx2 - vx3) + c9o2*(vx1 - vx2 - vx3)*(vx1 - vx2 - vx3) - cu_sq);
+			(D27.f[DIR_MPP])[ktnw] = c1o216*ConcD*(c1o1 + c3o1*(-vx1 + vx2 + vx3) + c9o2*(-vx1 + vx2 + vx3)*(-vx1 + vx2 + vx3) - cu_sq);
 			////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 		}
 	}
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
index 1a21f872f7adbde555e6d45a233c93eb7ff3e055..80c46f94549371811025e7f751acda65c858c0f4 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD27/InitCompAD27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
index c3d7e4a22e4b36f4689967486f9b6c3243c766e7..38cd57fd48e02e410e1ae557088e023ffeadfc4e 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
index 4eb8b9eeb70876f39bc8c0b58a8789fb239530a0..7dc17056bca6116aa6e98367a78e92320813b502 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompAD7/InitCompAD7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Comp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
index 2a5b8898643ec43c10302e372a1931a165ca1e4e..dcc3b9a060a026accffdc6d24f338a6d23295d73 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cu
@@ -8,7 +8,7 @@ using namespace vf::lbm::dir;
 
 #include <stdio.h>
 
-extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -42,63 +42,63 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DD[E   *size_Mat];
-            D.f[W   ] = &DD[W   *size_Mat];
-            D.f[N   ] = &DD[N   *size_Mat];
-            D.f[S   ] = &DD[S   *size_Mat];
-            D.f[T   ] = &DD[T   *size_Mat];
-            D.f[B   ] = &DD[B   *size_Mat];
-            D.f[NE  ] = &DD[NE  *size_Mat];
-            D.f[SW  ] = &DD[SW  *size_Mat];
-            D.f[SE  ] = &DD[SE  *size_Mat];
-            D.f[NW  ] = &DD[NW  *size_Mat];
-            D.f[TE  ] = &DD[TE  *size_Mat];
-            D.f[BW  ] = &DD[BW  *size_Mat];
-            D.f[BE  ] = &DD[BE  *size_Mat];
-            D.f[TW  ] = &DD[TW  *size_Mat];
-            D.f[TN  ] = &DD[TN  *size_Mat];
-            D.f[BS  ] = &DD[BS  *size_Mat];
-            D.f[BN  ] = &DD[BN  *size_Mat];
-            D.f[TS  ] = &DD[TS  *size_Mat];
-            D.f[REST] = &DD[REST*size_Mat];
-            D.f[TNE ] = &DD[TNE *size_Mat];
-            D.f[TSW ] = &DD[TSW *size_Mat];
-            D.f[TSE ] = &DD[TSE *size_Mat];
-            D.f[TNW ] = &DD[TNW *size_Mat];
-            D.f[BNE ] = &DD[BNE *size_Mat];
-            D.f[BSW ] = &DD[BSW *size_Mat];
-            D.f[BSE ] = &DD[BSE *size_Mat];
-            D.f[BNW ] = &DD[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DD[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DD[E   *size_Mat];
-            D.f[E   ] = &DD[W   *size_Mat];
-            D.f[S   ] = &DD[N   *size_Mat];
-            D.f[N   ] = &DD[S   *size_Mat];
-            D.f[B   ] = &DD[T   *size_Mat];
-            D.f[T   ] = &DD[B   *size_Mat];
-            D.f[SW  ] = &DD[NE  *size_Mat];
-            D.f[NE  ] = &DD[SW  *size_Mat];
-            D.f[NW  ] = &DD[SE  *size_Mat];
-            D.f[SE  ] = &DD[NW  *size_Mat];
-            D.f[BW  ] = &DD[TE  *size_Mat];
-            D.f[TE  ] = &DD[BW  *size_Mat];
-            D.f[TW  ] = &DD[BE  *size_Mat];
-            D.f[BE  ] = &DD[TW  *size_Mat];
-            D.f[BS  ] = &DD[TN  *size_Mat];
-            D.f[TN  ] = &DD[BS  *size_Mat];
-            D.f[TS  ] = &DD[BN  *size_Mat];
-            D.f[BN  ] = &DD[TS  *size_Mat];
-            D.f[REST] = &DD[REST*size_Mat];
-            D.f[BSW ] = &DD[TNE *size_Mat];
-            D.f[BNE ] = &DD[TSW *size_Mat];
-            D.f[BNW ] = &DD[TSE *size_Mat];
-            D.f[BSE ] = &DD[TNW *size_Mat];
-            D.f[TSW ] = &DD[BNE *size_Mat];
-            D.f[TNE ] = &DD[BSW *size_Mat];
-            D.f[TNW ] = &DD[BSE *size_Mat];
-            D.f[TSE ] = &DD[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DD[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
          }
          //////////////////////////////////////////////////////////////////////////
          real drho = rho[k];//0.0f;//
@@ -138,33 +138,33 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
          //////////////////////////////////////////////////////////////////////////
          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-         (D.f[REST])[kzero] =   c8o27* (drho-cu_sq*(c1o1+drho));
-         (D.f[E   ])[ke   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq));
-         (D.f[W   ])[kw   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-         (D.f[N   ])[kn   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-         (D.f[S   ])[ks   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-         (D.f[T   ])[kt   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq));
-         (D.f[B   ])[kb   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-         (D.f[NE  ])[kne  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-         (D.f[SW  ])[ksw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-         (D.f[SE  ])[kse  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-         (D.f[NW  ])[knw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-         (D.f[TE  ])[kte  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-         (D.f[BW  ])[kbw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-         (D.f[BE  ])[kbe  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-         (D.f[TW  ])[ktw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-         (D.f[TN  ])[ktn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-         (D.f[BS  ])[kbs  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-         (D.f[BN  ])[kbn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-         (D.f[TS  ])[kts  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-         (D.f[TNE ])[ktne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-         (D.f[BSW ])[kbsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-         (D.f[BNE ])[kbne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-         (D.f[TSW ])[ktsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-         (D.f[TSE ])[ktse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-         (D.f[BNW ])[kbnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-         (D.f[BSE ])[kbse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-         (D.f[TNW ])[ktnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+         (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq*(c1o1+drho));
+         (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq));
+         (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+         (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+         (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+         (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq));
+         (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+         (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+         (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+         (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+         (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+         (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+         (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+         (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+         (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+         (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+         (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+         (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+         (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+         (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+         (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+         (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+         (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+         (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+         (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+         (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+         (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
       }
    }
 }
@@ -179,7 +179,7 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 
 
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
+__global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
                                                    unsigned int* neighborY,
                                                    unsigned int* neighborZ,
                                                    unsigned int* neighborWSB,
@@ -215,63 +215,63 @@ extern "C" __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
             Distributions27 D;
             if (EvenOrOdd==true)
             {
-                D.f[E   ] = &DD[E   *size_Mat];
-                D.f[W   ] = &DD[W   *size_Mat];
-                D.f[N   ] = &DD[N   *size_Mat];
-                D.f[S   ] = &DD[S   *size_Mat];
-                D.f[T   ] = &DD[T   *size_Mat];
-                D.f[B   ] = &DD[B   *size_Mat];
-                D.f[NE  ] = &DD[NE  *size_Mat];
-                D.f[SW  ] = &DD[SW  *size_Mat];
-                D.f[SE  ] = &DD[SE  *size_Mat];
-                D.f[NW  ] = &DD[NW  *size_Mat];
-                D.f[TE  ] = &DD[TE  *size_Mat];
-                D.f[BW  ] = &DD[BW  *size_Mat];
-                D.f[BE  ] = &DD[BE  *size_Mat];
-                D.f[TW  ] = &DD[TW  *size_Mat];
-                D.f[TN  ] = &DD[TN  *size_Mat];
-                D.f[BS  ] = &DD[BS  *size_Mat];
-                D.f[BN  ] = &DD[BN  *size_Mat];
-                D.f[TS  ] = &DD[TS  *size_Mat];
-                D.f[REST] = &DD[REST*size_Mat];
-                D.f[TNE ] = &DD[TNE *size_Mat];
-                D.f[TSW ] = &DD[TSW *size_Mat];
-                D.f[TSE ] = &DD[TSE *size_Mat];
-                D.f[TNW ] = &DD[TNW *size_Mat];
-                D.f[BNE ] = &DD[BNE *size_Mat];
-                D.f[BSW ] = &DD[BSW *size_Mat];
-                D.f[BSE ] = &DD[BSE *size_Mat];
-                D.f[BNW ] = &DD[BNW *size_Mat];
+                D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+                D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+                D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+                D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+                D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+                D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+                D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+                D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+                D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+                D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+                D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+                D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+                D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+                D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DD[DIR_000*size_Mat];
+                D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+                D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+                D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+                D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+                D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+                D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+                D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+                D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
             }
             else
             {
-                D.f[W   ] = &DD[E   *size_Mat];
-                D.f[E   ] = &DD[W   *size_Mat];
-                D.f[S   ] = &DD[N   *size_Mat];
-                D.f[N   ] = &DD[S   *size_Mat];
-                D.f[B   ] = &DD[T   *size_Mat];
-                D.f[T   ] = &DD[B   *size_Mat];
-                D.f[SW  ] = &DD[NE  *size_Mat];
-                D.f[NE  ] = &DD[SW  *size_Mat];
-                D.f[NW  ] = &DD[SE  *size_Mat];
-                D.f[SE  ] = &DD[NW  *size_Mat];
-                D.f[BW  ] = &DD[TE  *size_Mat];
-                D.f[TE  ] = &DD[BW  *size_Mat];
-                D.f[TW  ] = &DD[BE  *size_Mat];
-                D.f[BE  ] = &DD[TW  *size_Mat];
-                D.f[BS  ] = &DD[TN  *size_Mat];
-                D.f[TN  ] = &DD[BS  *size_Mat];
-                D.f[TS  ] = &DD[BN  *size_Mat];
-                D.f[BN  ] = &DD[TS  *size_Mat];
-                D.f[REST] = &DD[REST*size_Mat];
-                D.f[BSW ] = &DD[TNE *size_Mat];
-                D.f[BNE ] = &DD[TSW *size_Mat];
-                D.f[BNW ] = &DD[TSE *size_Mat];
-                D.f[BSE ] = &DD[TNW *size_Mat];
-                D.f[TSW ] = &DD[BNE *size_Mat];
-                D.f[TNE ] = &DD[BSW *size_Mat];
-                D.f[TNW ] = &DD[BSE *size_Mat];
-                D.f[TSE ] = &DD[BNW *size_Mat];
+                D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+                D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+                D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+                D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+                D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+                D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+                D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+                D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+                D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+                D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+                D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+                D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+                D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+                D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+                D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+                D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+                D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+                D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+                D.f[DIR_000] = &DD[DIR_000*size_Mat];
+                D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+                D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+                D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+                D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+                D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+                D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+                D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+                D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
             }
             //////////////////////////////////////////////////////////////////////////
             real drho = rho[k];//0.0f;//
@@ -393,63 +393,63 @@ extern "C" __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
             //////////////////////////////////////////////////////////////////////////
             real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-            (D.f[REST])[kzero] =   c8o27* (drho-cu_sq*(c1o1+drho));
-            (D.f[E   ])[ke   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq));
-            (D.f[W   ])[kw   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq));
-            (D.f[N   ])[kn   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq));
-            (D.f[S   ])[ks   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
-            (D.f[T   ])[kt   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq));
-            (D.f[B   ])[kb   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq));
-            (D.f[NE  ])[kne  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
-            (D.f[SW  ])[ksw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
-            (D.f[SE  ])[kse  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
-            (D.f[NW  ])[knw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
-            (D.f[TE  ])[kte  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
-            (D.f[BW  ])[kbw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
-            (D.f[BE  ])[kbe  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
-            (D.f[TW  ])[ktw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
-            (D.f[TN  ])[ktn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
-            (D.f[BS  ])[kbs  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
-            (D.f[BN  ])[kbn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
-            (D.f[TS  ])[kts  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
-            (D.f[TNE ])[ktne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
-            (D.f[BSW ])[kbsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
-            (D.f[BNE ])[kbne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
-            (D.f[TSW ])[ktsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
-            (D.f[TSE ])[ktse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
-            (D.f[BNW ])[kbnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
-            (D.f[BSE ])[kbse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
-            (D.f[TNW ])[ktnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
+            (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq*(c1o1+drho));
+            (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq));
+            (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq));
+            (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq));
+            (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq));
+            (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq));
+            (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+ (c1o1+drho) * (c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq));
+            (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq));
+            (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq));
+            (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq));
+            (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq));
+            (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq));
+            (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq));
+            (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq));
+            (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq));
+            (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq));
+            (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq));
+            (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq));
+            (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+ (c1o1+drho) * (c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq));
+            (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq));
+            (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq));
+            (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq));
+            (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq));
+            (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq));
+            (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq));
+            (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq));
+            (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+ (c1o1+drho) * (c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq));
 
             //////////////////////////////////////////////////////////////////////////
 
-            (D.f[REST])[kzero] += (c1o1+drho) * f_ZERO;
-            (D.f[E   ])[ke   ] += (c1o1+drho) * f_E   ;
-            (D.f[W   ])[kw   ] += (c1o1+drho) * f_E   ;
-            (D.f[N   ])[kn   ] += (c1o1+drho) * f_N   ;
-            (D.f[S   ])[ks   ] += (c1o1+drho) * f_N   ;
-            (D.f[T   ])[kt   ] += (c1o1+drho) * f_T   ;
-            (D.f[B   ])[kb   ] += (c1o1+drho) * f_T   ;
-            (D.f[NE  ])[kne  ] += (c1o1+drho) * f_NE  ;
-            (D.f[SW  ])[ksw  ] += (c1o1+drho) * f_NE  ;
-            (D.f[SE  ])[kse  ] += (c1o1+drho) * f_SE  ;
-            (D.f[NW  ])[knw  ] += (c1o1+drho) * f_SE  ;
-            (D.f[TE  ])[kte  ] += (c1o1+drho) * f_TE  ;
-            (D.f[BW  ])[kbw  ] += (c1o1+drho) * f_TE  ;
-            (D.f[BE  ])[kbe  ] += (c1o1+drho) * f_BE  ;
-            (D.f[TW  ])[ktw  ] += (c1o1+drho) * f_BE  ;
-            (D.f[TN  ])[ktn  ] += (c1o1+drho) * f_TN  ;
-            (D.f[BS  ])[kbs  ] += (c1o1+drho) * f_TN  ;
-            (D.f[BN  ])[kbn  ] += (c1o1+drho) * f_BN  ;
-            (D.f[TS  ])[kts  ] += (c1o1+drho) * f_BN  ;
-            (D.f[TNE ])[ktne ] += (c1o1+drho) * f_TNE ;
-            (D.f[BSW ])[kbsw ] += (c1o1+drho) * f_TNE ;
-            (D.f[BNE ])[kbne ] += (c1o1+drho) * f_TSW ;
-            (D.f[TSW ])[ktsw ] += (c1o1+drho) * f_TSW ;
-            (D.f[TSE ])[ktse ] += (c1o1+drho) * f_TSE ;
-            (D.f[BNW ])[kbnw ] += (c1o1+drho) * f_TSE ;
-            (D.f[BSE ])[kbse ] += (c1o1+drho) * f_TNW ;
-            (D.f[TNW ])[ktnw ] += (c1o1+drho) * f_TNW ;
+            (D.f[DIR_000])[kzero] += (c1o1+drho) * f_ZERO;
+            (D.f[DIR_P00   ])[ke   ] += (c1o1+drho) * f_E   ;
+            (D.f[DIR_M00   ])[kw   ] += (c1o1+drho) * f_E   ;
+            (D.f[DIR_0P0   ])[kn   ] += (c1o1+drho) * f_N   ;
+            (D.f[DIR_0M0   ])[ks   ] += (c1o1+drho) * f_N   ;
+            (D.f[DIR_00P   ])[kt   ] += (c1o1+drho) * f_T   ;
+            (D.f[DIR_00M   ])[kb   ] += (c1o1+drho) * f_T   ;
+            (D.f[DIR_PP0  ])[kne  ] += (c1o1+drho) * f_NE  ;
+            (D.f[DIR_MM0  ])[ksw  ] += (c1o1+drho) * f_NE  ;
+            (D.f[DIR_PM0  ])[kse  ] += (c1o1+drho) * f_SE  ;
+            (D.f[DIR_MP0  ])[knw  ] += (c1o1+drho) * f_SE  ;
+            (D.f[DIR_P0P  ])[kte  ] += (c1o1+drho) * f_TE  ;
+            (D.f[DIR_M0M  ])[kbw  ] += (c1o1+drho) * f_TE  ;
+            (D.f[DIR_P0M  ])[kbe  ] += (c1o1+drho) * f_BE  ;
+            (D.f[DIR_M0P  ])[ktw  ] += (c1o1+drho) * f_BE  ;
+            (D.f[DIR_0PP  ])[ktn  ] += (c1o1+drho) * f_TN  ;
+            (D.f[DIR_0MM  ])[kbs  ] += (c1o1+drho) * f_TN  ;
+            (D.f[DIR_0PM  ])[kbn  ] += (c1o1+drho) * f_BN  ;
+            (D.f[DIR_0MP  ])[kts  ] += (c1o1+drho) * f_BN  ;
+            (D.f[DIR_PPP ])[ktne ] += (c1o1+drho) * f_TNE ;
+            (D.f[DIR_MMM ])[kbsw ] += (c1o1+drho) * f_TNE ;
+            (D.f[DIR_PPM ])[kbne ] += (c1o1+drho) * f_TSW ;
+            (D.f[DIR_MMP ])[ktsw ] += (c1o1+drho) * f_TSW ;
+            (D.f[DIR_PMP ])[ktse ] += (c1o1+drho) * f_TSE ;
+            (D.f[DIR_MPM ])[kbnw ] += (c1o1+drho) * f_TSE ;
+            (D.f[DIR_PMM ])[kbse ] += (c1o1+drho) * f_TNW ;
+            (D.f[DIR_MPP ])[ktnw ] += (c1o1+drho) * f_TNW ;
 
             //////////////////////////////////////////////////////////////////////////
         }
@@ -457,9 +457,9 @@ extern "C" __global__ void LB_Init_Comp_Neq_SP_27( unsigned int* neighborX,
 	    {
 		    //////////////////////////////////////////////////////////////////////////
 		    Distributions27 D;
-		    D.f[REST] = &DD[REST*size_Mat];
+		    D.f[DIR_000] = &DD[DIR_000*size_Mat];
 		    //////////////////////////////////////////////////////////////////////////
-		    (D.f[REST])[k] = c96o1;
+		    (D.f[DIR_000])[k] = c96o1;
 		    //////////////////////////////////////////////////////////////////////////
 	    }
    }
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
index dd9dbd7d0b5d783bdbdcc05dd6c38fad0d60b79f..0e1d89b9ac4778a1fdb4d8e72cd13be5eb3920d1 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitCompSP27/InitCompSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -16,7 +16,7 @@ extern "C" __global__ void LB_Init_Comp_SP_27(unsigned int* neighborX,
 	real* DD,
 	bool EvenOrOdd);
 
-extern "C" __global__ void LB_Init_Comp_Neq_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_Comp_Neq_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* neighborWSB,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
index 522c433d4184aaae9dc458bdb70f3e5491cad2d0..25af54e43ec213214615c2edc79d7996e4651c38 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cu
@@ -7,7 +7,7 @@ using namespace vf::lbm::dir;
 #include "math.h"
 
 
-extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
+__global__ void LB_Init_F3(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -41,21 +41,21 @@ extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
 			Distributions6 D;
 			if (EvenOrOdd == true)
 			{
-				D.g[E] = &G6[E   *size_Mat];
-				D.g[W] = &G6[W   *size_Mat];
-				D.g[N] = &G6[N   *size_Mat];
-				D.g[S] = &G6[S   *size_Mat];
-				D.g[T] = &G6[T   *size_Mat];
-				D.g[B] = &G6[B   *size_Mat];
+				D.g[DIR_P00] = &G6[DIR_P00   *size_Mat];
+				D.g[DIR_M00] = &G6[DIR_M00   *size_Mat];
+				D.g[DIR_0P0] = &G6[DIR_0P0   *size_Mat];
+				D.g[DIR_0M0] = &G6[DIR_0M0   *size_Mat];
+				D.g[DIR_00P] = &G6[DIR_00P   *size_Mat];
+				D.g[DIR_00M] = &G6[DIR_00M   *size_Mat];
 			}
 			else
 			{
-				D.g[W] = &G6[E   *size_Mat];
-				D.g[E] = &G6[W   *size_Mat];
-				D.g[S] = &G6[N   *size_Mat];
-				D.g[N] = &G6[S   *size_Mat];
-				D.g[B] = &G6[T   *size_Mat];
-				D.g[T] = &G6[B   *size_Mat];
+				D.g[DIR_M00] = &G6[DIR_P00   *size_Mat];
+				D.g[DIR_P00] = &G6[DIR_M00   *size_Mat];
+				D.g[DIR_0M0] = &G6[DIR_0P0   *size_Mat];
+				D.g[DIR_0P0] = &G6[DIR_0M0   *size_Mat];
+				D.g[DIR_00M] = &G6[DIR_00P   *size_Mat];
+				D.g[DIR_00P] = &G6[DIR_00M   *size_Mat];
 			}
 			//////////////////////////////////////////////////////////////////////////
 			//index
@@ -69,12 +69,12 @@ extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
 			unsigned int kb = neighborZ[k];
 			//////////////////////////////////////////////////////////////////////////
 
-			(D.g[E])[ke] = 0.0f;
-			(D.g[W])[kw] = 0.0f;
-			(D.g[N])[kn] = 0.0f;
-			(D.g[S])[ks] = 0.0f;
-			(D.g[T])[kt] = 0.0f;
-			(D.g[B])[kb] = 0.0f;
+			(D.g[DIR_P00])[ke] = 0.0f;
+			(D.g[DIR_M00])[kw] = 0.0f;
+			(D.g[DIR_0P0])[kn] = 0.0f;
+			(D.g[DIR_0M0])[ks] = 0.0f;
+			(D.g[DIR_00P])[kt] = 0.0f;
+			(D.g[DIR_00M])[kb] = 0.0f;
 		}
 	}
 }
\ No newline at end of file
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
index 0df2f099bfede16d19edf171016e1a20e1efe0b7..71750c0fde2722a900800f742d801b6caf2febbf 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitF3/InitF3_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_F3(unsigned int* neighborX,
+__global__ void LB_Init_F3(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
index 2ab2ec33fcf0131b1fc463327f1d927bb0c5819e..62d766aaa04b6f6349c6c4106e201f36898601ec 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
          Distributions27 D27;
          if (EvenOrOdd==true)
          {
-            D27.f[E   ] = &DD27[E   *size_Mat];
-            D27.f[W   ] = &DD27[W   *size_Mat];
-            D27.f[N   ] = &DD27[N   *size_Mat];
-            D27.f[S   ] = &DD27[S   *size_Mat];
-            D27.f[T   ] = &DD27[T   *size_Mat];
-            D27.f[B   ] = &DD27[B   *size_Mat];
-            D27.f[NE  ] = &DD27[NE  *size_Mat];
-            D27.f[SW  ] = &DD27[SW  *size_Mat];
-            D27.f[SE  ] = &DD27[SE  *size_Mat];
-            D27.f[NW  ] = &DD27[NW  *size_Mat];
-            D27.f[TE  ] = &DD27[TE  *size_Mat];
-            D27.f[BW  ] = &DD27[BW  *size_Mat];
-            D27.f[BE  ] = &DD27[BE  *size_Mat];
-            D27.f[TW  ] = &DD27[TW  *size_Mat];
-            D27.f[TN  ] = &DD27[TN  *size_Mat];
-            D27.f[BS  ] = &DD27[BS  *size_Mat];
-            D27.f[BN  ] = &DD27[BN  *size_Mat];
-            D27.f[TS  ] = &DD27[TS  *size_Mat];
-            D27.f[REST] = &DD27[REST*size_Mat];
-            D27.f[TNE ] = &DD27[TNE *size_Mat];
-            D27.f[TSW ] = &DD27[TSW *size_Mat];
-            D27.f[TSE ] = &DD27[TSE *size_Mat];
-            D27.f[TNW ] = &DD27[TNW *size_Mat];
-            D27.f[BNE ] = &DD27[BNE *size_Mat];
-            D27.f[BSW ] = &DD27[BSW *size_Mat];
-            D27.f[BSE ] = &DD27[BSE *size_Mat];
-            D27.f[BNW ] = &DD27[BNW *size_Mat];
+            D27.f[DIR_P00   ] = &DD27[DIR_P00   *size_Mat];
+            D27.f[DIR_M00   ] = &DD27[DIR_M00   *size_Mat];
+            D27.f[DIR_0P0   ] = &DD27[DIR_0P0   *size_Mat];
+            D27.f[DIR_0M0   ] = &DD27[DIR_0M0   *size_Mat];
+            D27.f[DIR_00P   ] = &DD27[DIR_00P   *size_Mat];
+            D27.f[DIR_00M   ] = &DD27[DIR_00M   *size_Mat];
+            D27.f[DIR_PP0  ] = &DD27[DIR_PP0  *size_Mat];
+            D27.f[DIR_MM0  ] = &DD27[DIR_MM0  *size_Mat];
+            D27.f[DIR_PM0  ] = &DD27[DIR_PM0  *size_Mat];
+            D27.f[DIR_MP0  ] = &DD27[DIR_MP0  *size_Mat];
+            D27.f[DIR_P0P  ] = &DD27[DIR_P0P  *size_Mat];
+            D27.f[DIR_M0M  ] = &DD27[DIR_M0M  *size_Mat];
+            D27.f[DIR_P0M  ] = &DD27[DIR_P0M  *size_Mat];
+            D27.f[DIR_M0P  ] = &DD27[DIR_M0P  *size_Mat];
+            D27.f[DIR_0PP  ] = &DD27[DIR_0PP  *size_Mat];
+            D27.f[DIR_0MM  ] = &DD27[DIR_0MM  *size_Mat];
+            D27.f[DIR_0PM  ] = &DD27[DIR_0PM  *size_Mat];
+            D27.f[DIR_0MP  ] = &DD27[DIR_0MP  *size_Mat];
+            D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+            D27.f[DIR_PPP ] = &DD27[DIR_PPP *size_Mat];
+            D27.f[DIR_MMP ] = &DD27[DIR_MMP *size_Mat];
+            D27.f[DIR_PMP ] = &DD27[DIR_PMP *size_Mat];
+            D27.f[DIR_MPP ] = &DD27[DIR_MPP *size_Mat];
+            D27.f[DIR_PPM ] = &DD27[DIR_PPM *size_Mat];
+            D27.f[DIR_MMM ] = &DD27[DIR_MMM *size_Mat];
+            D27.f[DIR_PMM ] = &DD27[DIR_PMM *size_Mat];
+            D27.f[DIR_MPM ] = &DD27[DIR_MPM *size_Mat];
          }
          else
          {
-            D27.f[W   ] = &DD27[E   *size_Mat];
-            D27.f[E   ] = &DD27[W   *size_Mat];
-            D27.f[S   ] = &DD27[N   *size_Mat];
-            D27.f[N   ] = &DD27[S   *size_Mat];
-            D27.f[B   ] = &DD27[T   *size_Mat];
-            D27.f[T   ] = &DD27[B   *size_Mat];
-            D27.f[SW  ] = &DD27[NE  *size_Mat];
-            D27.f[NE  ] = &DD27[SW  *size_Mat];
-            D27.f[NW  ] = &DD27[SE  *size_Mat];
-            D27.f[SE  ] = &DD27[NW  *size_Mat];
-            D27.f[BW  ] = &DD27[TE  *size_Mat];
-            D27.f[TE  ] = &DD27[BW  *size_Mat];
-            D27.f[TW  ] = &DD27[BE  *size_Mat];
-            D27.f[BE  ] = &DD27[TW  *size_Mat];
-            D27.f[BS  ] = &DD27[TN  *size_Mat];
-            D27.f[TN  ] = &DD27[BS  *size_Mat];
-            D27.f[TS  ] = &DD27[BN  *size_Mat];
-            D27.f[BN  ] = &DD27[TS  *size_Mat];
-            D27.f[REST] = &DD27[REST*size_Mat];
-            D27.f[BSW ] = &DD27[TNE *size_Mat];
-            D27.f[BNE ] = &DD27[TSW *size_Mat];
-            D27.f[BNW ] = &DD27[TSE *size_Mat];
-            D27.f[BSE ] = &DD27[TNW *size_Mat];
-            D27.f[TSW ] = &DD27[BNE *size_Mat];
-            D27.f[TNE ] = &DD27[BSW *size_Mat];
-            D27.f[TNW ] = &DD27[BSE *size_Mat];
-            D27.f[TSE ] = &DD27[BNW *size_Mat];
+            D27.f[DIR_M00   ] = &DD27[DIR_P00   *size_Mat];
+            D27.f[DIR_P00   ] = &DD27[DIR_M00   *size_Mat];
+            D27.f[DIR_0M0   ] = &DD27[DIR_0P0   *size_Mat];
+            D27.f[DIR_0P0   ] = &DD27[DIR_0M0   *size_Mat];
+            D27.f[DIR_00M   ] = &DD27[DIR_00P   *size_Mat];
+            D27.f[DIR_00P   ] = &DD27[DIR_00M   *size_Mat];
+            D27.f[DIR_MM0  ] = &DD27[DIR_PP0  *size_Mat];
+            D27.f[DIR_PP0  ] = &DD27[DIR_MM0  *size_Mat];
+            D27.f[DIR_MP0  ] = &DD27[DIR_PM0  *size_Mat];
+            D27.f[DIR_PM0  ] = &DD27[DIR_MP0  *size_Mat];
+            D27.f[DIR_M0M  ] = &DD27[DIR_P0P  *size_Mat];
+            D27.f[DIR_P0P  ] = &DD27[DIR_M0M  *size_Mat];
+            D27.f[DIR_M0P  ] = &DD27[DIR_P0M  *size_Mat];
+            D27.f[DIR_P0M  ] = &DD27[DIR_M0P  *size_Mat];
+            D27.f[DIR_0MM  ] = &DD27[DIR_0PP  *size_Mat];
+            D27.f[DIR_0PP  ] = &DD27[DIR_0MM  *size_Mat];
+            D27.f[DIR_0MP  ] = &DD27[DIR_0PM  *size_Mat];
+            D27.f[DIR_0PM  ] = &DD27[DIR_0MP  *size_Mat];
+            D27.f[DIR_000] = &DD27[DIR_000*size_Mat];
+            D27.f[DIR_MMM ] = &DD27[DIR_PPP *size_Mat];
+            D27.f[DIR_PPM ] = &DD27[DIR_MMP *size_Mat];
+            D27.f[DIR_MPM ] = &DD27[DIR_PMP *size_Mat];
+            D27.f[DIR_PMM ] = &DD27[DIR_MPP *size_Mat];
+            D27.f[DIR_MMP ] = &DD27[DIR_PPM *size_Mat];
+            D27.f[DIR_PPP ] = &DD27[DIR_MMM *size_Mat];
+            D27.f[DIR_MPP ] = &DD27[DIR_PMM *size_Mat];
+            D27.f[DIR_PMP ] = &DD27[DIR_MPM *size_Mat];
          }
          //////////////////////////////////////////////////////////////////////////
          real ConcD = Conc[k];
@@ -140,33 +140,33 @@ extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-         (D27.f[REST])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
-         (D27.f[E   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-         (D27.f[W   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-         (D27.f[N   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-         (D27.f[S   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-         (D27.f[T   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-         (D27.f[B   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-         (D27.f[NE  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-         (D27.f[SW  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-         (D27.f[SE  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-         (D27.f[NW  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-         (D27.f[TE  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-         (D27.f[BW  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-         (D27.f[BE  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-         (D27.f[TW  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-         (D27.f[TN  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-         (D27.f[BS  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-         (D27.f[BN  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-         (D27.f[TS  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-         (D27.f[TNE ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-         (D27.f[BSW ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-         (D27.f[BNE ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-         (D27.f[TSW ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-         (D27.f[TSE ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-         (D27.f[BNW ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-         (D27.f[BSE ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-         (D27.f[TNW ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         (D27.f[DIR_000])[kzero] =   c8o27* ConcD*(c1o1-cu_sq);
+         (D27.f[DIR_P00   ])[ke   ] =   c2o27* ConcD*(c1o1+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+         (D27.f[DIR_M00   ])[kw   ] =   c2o27* ConcD*(c1o1+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         (D27.f[DIR_0P0   ])[kn   ] =   c2o27* ConcD*(c1o1+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         (D27.f[DIR_0M0   ])[ks   ] =   c2o27* ConcD*(c1o1+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         (D27.f[DIR_00P   ])[kt   ] =   c2o27* ConcD*(c1o1+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+         (D27.f[DIR_00M   ])[kb   ] =   c2o27* ConcD*(c1o1+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         (D27.f[DIR_PP0  ])[kne  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         (D27.f[DIR_MM0  ])[ksw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         (D27.f[DIR_PM0  ])[kse  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         (D27.f[DIR_MP0  ])[knw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         (D27.f[DIR_P0P  ])[kte  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         (D27.f[DIR_M0M  ])[kbw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         (D27.f[DIR_P0M  ])[kbe  ] =   c1o54* ConcD*(c1o1+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         (D27.f[DIR_M0P  ])[ktw  ] =   c1o54* ConcD*(c1o1+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         (D27.f[DIR_0PP  ])[ktn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         (D27.f[DIR_0MM  ])[kbs  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         (D27.f[DIR_0PM  ])[kbn  ] =   c1o54* ConcD*(c1o1+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         (D27.f[DIR_0MP  ])[kts  ] =   c1o54* ConcD*(c1o1+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         (D27.f[DIR_PPP ])[ktne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         (D27.f[DIR_MMM ])[kbsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         (D27.f[DIR_PPM ])[kbne ] =   c1o216*ConcD*(c1o1+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         (D27.f[DIR_MMP ])[ktsw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         (D27.f[DIR_PMP ])[ktse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         (D27.f[DIR_MPM ])[kbnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         (D27.f[DIR_PMM ])[kbse ] =   c1o216*ConcD*(c1o1+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         (D27.f[DIR_MPP ])[ktnw ] =   c1o216*ConcD*(c1o1+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
          ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
       }
    }
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
index a67b7a9f446fc49983d0a9f2e309dfaec3649547..85b8f1b7ae9f814b1a15aa2a8af4aa2d7c3645b5 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD27/InitIncompAD27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
index 2b0e85f38443889fcdbe64bb2cef6a02a7f654a9..94a4352d43dee67117f66eaf03536c5ea3e15edd 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
index 9dedbf270e6e53283d41d315d67135f9c57940db..2d381e6e0531b900cee90eb8f7e2f3d4ff8615f7 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitIncompAD7/InitIncompAD7_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
+__global__ void LB_Init_Incomp_AD_7(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
index 27dd33b14086730b157d1ffba495d9ecc5b7b221..b58935feb0bf276a2d8da3f36efbb1fb0ab9d13f 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cu
@@ -6,7 +6,7 @@ using namespace vf::lbm::constant;
 using namespace vf::lbm::dir;
 #include "math.h"
 
-extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
@@ -40,63 +40,63 @@ extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
          Distributions27 D;
          if (EvenOrOdd==true)
          {
-            D.f[E   ] = &DD[E   *size_Mat];
-            D.f[W   ] = &DD[W   *size_Mat];
-            D.f[N   ] = &DD[N   *size_Mat];
-            D.f[S   ] = &DD[S   *size_Mat];
-            D.f[T   ] = &DD[T   *size_Mat];
-            D.f[B   ] = &DD[B   *size_Mat];
-            D.f[NE  ] = &DD[NE  *size_Mat];
-            D.f[SW  ] = &DD[SW  *size_Mat];
-            D.f[SE  ] = &DD[SE  *size_Mat];
-            D.f[NW  ] = &DD[NW  *size_Mat];
-            D.f[TE  ] = &DD[TE  *size_Mat];
-            D.f[BW  ] = &DD[BW  *size_Mat];
-            D.f[BE  ] = &DD[BE  *size_Mat];
-            D.f[TW  ] = &DD[TW  *size_Mat];
-            D.f[TN  ] = &DD[TN  *size_Mat];
-            D.f[BS  ] = &DD[BS  *size_Mat];
-            D.f[BN  ] = &DD[BN  *size_Mat];
-            D.f[TS  ] = &DD[TS  *size_Mat];
-            D.f[REST] = &DD[REST*size_Mat];
-            D.f[TNE ] = &DD[TNE *size_Mat];
-            D.f[TSW ] = &DD[TSW *size_Mat];
-            D.f[TSE ] = &DD[TSE *size_Mat];
-            D.f[TNW ] = &DD[TNW *size_Mat];
-            D.f[BNE ] = &DD[BNE *size_Mat];
-            D.f[BSW ] = &DD[BSW *size_Mat];
-            D.f[BSE ] = &DD[BSE *size_Mat];
-            D.f[BNW ] = &DD[BNW *size_Mat];
+            D.f[DIR_P00   ] = &DD[DIR_P00   *size_Mat];
+            D.f[DIR_M00   ] = &DD[DIR_M00   *size_Mat];
+            D.f[DIR_0P0   ] = &DD[DIR_0P0   *size_Mat];
+            D.f[DIR_0M0   ] = &DD[DIR_0M0   *size_Mat];
+            D.f[DIR_00P   ] = &DD[DIR_00P   *size_Mat];
+            D.f[DIR_00M   ] = &DD[DIR_00M   *size_Mat];
+            D.f[DIR_PP0  ] = &DD[DIR_PP0  *size_Mat];
+            D.f[DIR_MM0  ] = &DD[DIR_MM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DD[DIR_PM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DD[DIR_MP0  *size_Mat];
+            D.f[DIR_P0P  ] = &DD[DIR_P0P  *size_Mat];
+            D.f[DIR_M0M  ] = &DD[DIR_M0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DD[DIR_P0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DD[DIR_M0P  *size_Mat];
+            D.f[DIR_0PP  ] = &DD[DIR_0PP  *size_Mat];
+            D.f[DIR_0MM  ] = &DD[DIR_0MM  *size_Mat];
+            D.f[DIR_0PM  ] = &DD[DIR_0PM  *size_Mat];
+            D.f[DIR_0MP  ] = &DD[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DD[DIR_000*size_Mat];
+            D.f[DIR_PPP ] = &DD[DIR_PPP *size_Mat];
+            D.f[DIR_MMP ] = &DD[DIR_MMP *size_Mat];
+            D.f[DIR_PMP ] = &DD[DIR_PMP *size_Mat];
+            D.f[DIR_MPP ] = &DD[DIR_MPP *size_Mat];
+            D.f[DIR_PPM ] = &DD[DIR_PPM *size_Mat];
+            D.f[DIR_MMM ] = &DD[DIR_MMM *size_Mat];
+            D.f[DIR_PMM ] = &DD[DIR_PMM *size_Mat];
+            D.f[DIR_MPM ] = &DD[DIR_MPM *size_Mat];
          }
          else
          {
-            D.f[W   ] = &DD[E   *size_Mat];
-            D.f[E   ] = &DD[W   *size_Mat];
-            D.f[S   ] = &DD[N   *size_Mat];
-            D.f[N   ] = &DD[S   *size_Mat];
-            D.f[B   ] = &DD[T   *size_Mat];
-            D.f[T   ] = &DD[B   *size_Mat];
-            D.f[SW  ] = &DD[NE  *size_Mat];
-            D.f[NE  ] = &DD[SW  *size_Mat];
-            D.f[NW  ] = &DD[SE  *size_Mat];
-            D.f[SE  ] = &DD[NW  *size_Mat];
-            D.f[BW  ] = &DD[TE  *size_Mat];
-            D.f[TE  ] = &DD[BW  *size_Mat];
-            D.f[TW  ] = &DD[BE  *size_Mat];
-            D.f[BE  ] = &DD[TW  *size_Mat];
-            D.f[BS  ] = &DD[TN  *size_Mat];
-            D.f[TN  ] = &DD[BS  *size_Mat];
-            D.f[TS  ] = &DD[BN  *size_Mat];
-            D.f[BN  ] = &DD[TS  *size_Mat];
-            D.f[REST] = &DD[REST*size_Mat];
-            D.f[BSW ] = &DD[TNE *size_Mat];
-            D.f[BNE ] = &DD[TSW *size_Mat];
-            D.f[BNW ] = &DD[TSE *size_Mat];
-            D.f[BSE ] = &DD[TNW *size_Mat];
-            D.f[TSW ] = &DD[BNE *size_Mat];
-            D.f[TNE ] = &DD[BSW *size_Mat];
-            D.f[TNW ] = &DD[BSE *size_Mat];
-            D.f[TSE ] = &DD[BNW *size_Mat];
+            D.f[DIR_M00   ] = &DD[DIR_P00   *size_Mat];
+            D.f[DIR_P00   ] = &DD[DIR_M00   *size_Mat];
+            D.f[DIR_0M0   ] = &DD[DIR_0P0   *size_Mat];
+            D.f[DIR_0P0   ] = &DD[DIR_0M0   *size_Mat];
+            D.f[DIR_00M   ] = &DD[DIR_00P   *size_Mat];
+            D.f[DIR_00P   ] = &DD[DIR_00M   *size_Mat];
+            D.f[DIR_MM0  ] = &DD[DIR_PP0  *size_Mat];
+            D.f[DIR_PP0  ] = &DD[DIR_MM0  *size_Mat];
+            D.f[DIR_MP0  ] = &DD[DIR_PM0  *size_Mat];
+            D.f[DIR_PM0  ] = &DD[DIR_MP0  *size_Mat];
+            D.f[DIR_M0M  ] = &DD[DIR_P0P  *size_Mat];
+            D.f[DIR_P0P  ] = &DD[DIR_M0M  *size_Mat];
+            D.f[DIR_M0P  ] = &DD[DIR_P0M  *size_Mat];
+            D.f[DIR_P0M  ] = &DD[DIR_M0P  *size_Mat];
+            D.f[DIR_0MM  ] = &DD[DIR_0PP  *size_Mat];
+            D.f[DIR_0PP  ] = &DD[DIR_0MM  *size_Mat];
+            D.f[DIR_0MP  ] = &DD[DIR_0PM  *size_Mat];
+            D.f[DIR_0PM  ] = &DD[DIR_0MP  *size_Mat];
+            D.f[DIR_000] = &DD[DIR_000*size_Mat];
+            D.f[DIR_MMM ] = &DD[DIR_PPP *size_Mat];
+            D.f[DIR_PPM ] = &DD[DIR_MMP *size_Mat];
+            D.f[DIR_MPM ] = &DD[DIR_PMP *size_Mat];
+            D.f[DIR_PMM ] = &DD[DIR_MPP *size_Mat];
+            D.f[DIR_MMP ] = &DD[DIR_PPM *size_Mat];
+            D.f[DIR_PPP ] = &DD[DIR_MMM *size_Mat];
+            D.f[DIR_MPP ] = &DD[DIR_PMM *size_Mat];
+            D.f[DIR_PMP ] = &DD[DIR_MPM *size_Mat];
          }
          //////////////////////////////////////////////////////////////////////////
          real drho = rho[k];//0.0f;//
@@ -136,41 +136,41 @@ extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
          //////////////////////////////////////////////////////////////////////////
          real cu_sq=c3o2*(vx1*vx1+vx2*vx2+vx3*vx3);
 
-         (D.f[REST])[kzero] =   c8o27* (drho-cu_sq);
-         (D.f[E   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
-         (D.f[W   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
-         (D.f[N   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
-         (D.f[S   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
-         (D.f[T   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
-         (D.f[B   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
-         (D.f[NE  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
-         (D.f[SW  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
-         (D.f[SE  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
-         (D.f[NW  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
-         (D.f[TE  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
-         (D.f[BW  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
-         (D.f[BE  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
-         (D.f[TW  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
-         (D.f[TN  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
-         (D.f[BS  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
-         (D.f[BN  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
-         (D.f[TS  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
-         (D.f[TNE ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
-         (D.f[BSW ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
-         (D.f[BNE ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
-         (D.f[TSW ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
-         (D.f[TSE ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
-         (D.f[BNW ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
-         (D.f[BSE ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
-         (D.f[TNW ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_000])[kzero] =   c8o27* (drho-cu_sq);
+         (D.f[DIR_P00   ])[ke   ] =   c2o27* (drho+c3o1*( vx1        )+c9o2*( vx1        )*( vx1        )-cu_sq);
+         (D.f[DIR_M00   ])[kw   ] =   c2o27* (drho+c3o1*(-vx1        )+c9o2*(-vx1        )*(-vx1        )-cu_sq);
+         (D.f[DIR_0P0   ])[kn   ] =   c2o27* (drho+c3o1*(    vx2     )+c9o2*(     vx2    )*(     vx2    )-cu_sq);
+         (D.f[DIR_0M0   ])[ks   ] =   c2o27* (drho+c3o1*(   -vx2     )+c9o2*(    -vx2    )*(    -vx2    )-cu_sq);
+         (D.f[DIR_00P   ])[kt   ] =   c2o27* (drho+c3o1*(         vx3)+c9o2*(         vx3)*(         vx3)-cu_sq);
+         (D.f[DIR_00M   ])[kb   ] =   c2o27* (drho+c3o1*(        -vx3)+c9o2*(        -vx3)*(        -vx3)-cu_sq);
+         (D.f[DIR_PP0  ])[kne  ] =   c1o54* (drho+c3o1*( vx1+vx2    )+c9o2*( vx1+vx2    )*( vx1+vx2    )-cu_sq);
+         (D.f[DIR_MM0  ])[ksw  ] =   c1o54* (drho+c3o1*(-vx1-vx2    )+c9o2*(-vx1-vx2    )*(-vx1-vx2    )-cu_sq);
+         (D.f[DIR_PM0  ])[kse  ] =   c1o54* (drho+c3o1*( vx1-vx2    )+c9o2*( vx1-vx2    )*( vx1-vx2    )-cu_sq);
+         (D.f[DIR_MP0  ])[knw  ] =   c1o54* (drho+c3o1*(-vx1+vx2    )+c9o2*(-vx1+vx2    )*(-vx1+vx2    )-cu_sq);
+         (D.f[DIR_P0P  ])[kte  ] =   c1o54* (drho+c3o1*( vx1    +vx3)+c9o2*( vx1    +vx3)*( vx1    +vx3)-cu_sq);
+         (D.f[DIR_M0M  ])[kbw  ] =   c1o54* (drho+c3o1*(-vx1    -vx3)+c9o2*(-vx1    -vx3)*(-vx1    -vx3)-cu_sq);
+         (D.f[DIR_P0M  ])[kbe  ] =   c1o54* (drho+c3o1*( vx1    -vx3)+c9o2*( vx1    -vx3)*( vx1    -vx3)-cu_sq);
+         (D.f[DIR_M0P  ])[ktw  ] =   c1o54* (drho+c3o1*(-vx1    +vx3)+c9o2*(-vx1    +vx3)*(-vx1    +vx3)-cu_sq);
+         (D.f[DIR_0PP  ])[ktn  ] =   c1o54* (drho+c3o1*(     vx2+vx3)+c9o2*(     vx2+vx3)*(     vx2+vx3)-cu_sq);
+         (D.f[DIR_0MM  ])[kbs  ] =   c1o54* (drho+c3o1*(    -vx2-vx3)+c9o2*(    -vx2-vx3)*(    -vx2-vx3)-cu_sq);
+         (D.f[DIR_0PM  ])[kbn  ] =   c1o54* (drho+c3o1*(     vx2-vx3)+c9o2*(     vx2-vx3)*(     vx2-vx3)-cu_sq);
+         (D.f[DIR_0MP  ])[kts  ] =   c1o54* (drho+c3o1*(    -vx2+vx3)+c9o2*(    -vx2+vx3)*(    -vx2+vx3)-cu_sq);
+         (D.f[DIR_PPP ])[ktne ] =   c1o216*(drho+c3o1*( vx1+vx2+vx3)+c9o2*( vx1+vx2+vx3)*( vx1+vx2+vx3)-cu_sq);
+         (D.f[DIR_MMM ])[kbsw ] =   c1o216*(drho+c3o1*(-vx1-vx2-vx3)+c9o2*(-vx1-vx2-vx3)*(-vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_PPM ])[kbne ] =   c1o216*(drho+c3o1*( vx1+vx2-vx3)+c9o2*( vx1+vx2-vx3)*( vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_MMP ])[ktsw ] =   c1o216*(drho+c3o1*(-vx1-vx2+vx3)+c9o2*(-vx1-vx2+vx3)*(-vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_PMP ])[ktse ] =   c1o216*(drho+c3o1*( vx1-vx2+vx3)+c9o2*( vx1-vx2+vx3)*( vx1-vx2+vx3)-cu_sq);
+         (D.f[DIR_MPM ])[kbnw ] =   c1o216*(drho+c3o1*(-vx1+vx2-vx3)+c9o2*(-vx1+vx2-vx3)*(-vx1+vx2-vx3)-cu_sq);
+         (D.f[DIR_PMM ])[kbse ] =   c1o216*(drho+c3o1*( vx1-vx2-vx3)+c9o2*( vx1-vx2-vx3)*( vx1-vx2-vx3)-cu_sq);
+         (D.f[DIR_MPP ])[ktnw ] =   c1o216*(drho+c3o1*(-vx1+vx2+vx3)+c9o2*(-vx1+vx2+vx3)*(-vx1+vx2+vx3)-cu_sq);
       }
 	  else
 	  {
 		  //////////////////////////////////////////////////////////////////////////
 		  Distributions27 D;
-		  D.f[REST] = &DD[REST*size_Mat];
+		  D.f[DIR_000] = &DD[DIR_000*size_Mat];
 		  //////////////////////////////////////////////////////////////////////////
-		  (D.f[REST])[k] = c96o1;
+		  (D.f[DIR_000])[k] = c96o1;
 		  //////////////////////////////////////////////////////////////////////////
 	  }
    }
diff --git a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
index 038b10f7bd85786fe3b3ef4c4d70d11816543def..2cb9c74506862e4c4766b2416dfc7f1a0024e6b0 100644
--- a/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
+++ b/src/gpu/VirtualFluids_GPU/PreProcessor/PreProcessorStrategy/InitSP27/InitSP27_Device.cuh
@@ -4,7 +4,7 @@
 #include <DataTypes.h>
 #include <curand.h>
 
-extern "C" __global__ void LB_Init_SP_27(unsigned int* neighborX,
+__global__ void LB_Init_SP_27(unsigned int* neighborX,
 	unsigned int* neighborY,
 	unsigned int* neighborZ,
 	unsigned int* geoD,
diff --git a/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h b/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
index 9c1b2d86da5823c552e14c686430cdf30e21f85d..8704431e95d86ddfc4df8f14ed189592250283be 100644
--- a/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
+++ b/src/gpu/VirtualFluids_GPU/Temperature/FindQTemp.h
@@ -6,17 +6,17 @@
 #include "Parameter/Parameter.h"
 
 
-extern "C" void findTempPress(Parameter* para);
+void findTempPress(Parameter* para);
 
-extern "C" void findKforTempPress(Parameter* para);
+void findKforTempPress(Parameter* para);
 
-extern "C" void findTempVel(Parameter* para);
+void findTempVel(Parameter* para);
 
-extern "C" void findKforTempVel(Parameter* para);
+void findKforTempVel(Parameter* para);
 
-extern "C" void findTemp(Parameter* para);
+void findTemp(Parameter* para);
 
-extern "C" void findKforTemp(Parameter* para);
+void findKforTemp(Parameter* para);
 
 
 #endif
diff --git a/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.cpp b/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..980a199b03710ac91310d26424c20c7371f1097d
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.cpp
@@ -0,0 +1,88 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TurbulentViscosityFactory.cpp
+//! \ingroup TurbulentViscosity
+//! \author Henrik Asmuth
+//=======================================================================================
+#include "LBM/LB.h"
+#include "TurbulenceModelFactory.h"
+#include "GPU/TurbulentViscosityKernels.h"
+#include "Parameter/Parameter.h"
+#include <logger/Logger.h>
+
+#include <variant>
+
+void TurbulenceModelFactory::setTurbulenceModel(TurbulenceModel _turbulenceModel)
+{
+    this->turbulenceModel = _turbulenceModel;
+    para->setTurbulenceModel(_turbulenceModel);
+    if(this->turbulenceModel != TurbulenceModel::None) para->setUseTurbulentViscosity(true);
+
+    switch (this->turbulenceModel) {
+        case TurbulenceModel::AMD:
+            this->turbulenceModelKernel = calcTurbulentViscosityAMD;
+            break;
+        default:
+            this->turbulenceModelKernel = nullptr;
+    }
+}
+
+void TurbulenceModelFactory::setModelConstant(real modelConstant)
+{
+    para->setSGSConstant(modelConstant);
+}
+
+void TurbulenceModelFactory::readConfigFile(const vf::basics::ConfigurationFile &configData)
+{
+    if (configData.contains("TurbulenceModel"))
+    {
+        std::string config = configData.getValue<std::string>("TurbulenceModel");
+        
+        if      (config == "Smagorinsky") this->setTurbulenceModel( TurbulenceModel::Smagorinsky ); 
+        else if (config == "AMD")         this->setTurbulenceModel( TurbulenceModel::AMD );               
+        else if (config == "QR" )         this->setTurbulenceModel( TurbulenceModel::QR );             
+        else if (config == "None")        this->setTurbulenceModel( TurbulenceModel::None );           
+        else    std::runtime_error("TurbulenceModelFactory: Invalid turbulence model!");           
+
+        VF_LOG_INFO("Turbulence model: {}", config);
+        
+    }
+
+    if (configData.contains("SGSconstant"))
+    {
+        para->setSGSConstant(configData.getValue<real>("SGSconstant"));
+
+        VF_LOG_INFO("SGS constant: {}", para->getSGSConstant() );
+    }
+}
+
+void TurbulenceModelFactory::runTurbulenceModelKernel(const int level) const
+{
+    if(this->turbulenceModelKernel) this->turbulenceModelKernel(para.get(), level);
+}
diff --git a/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h b/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h
new file mode 100644
index 0000000000000000000000000000000000000000..e71c8ed5f7be016a4a800b83cfb3252ee6b8246e
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/TurbulenceModels/TurbulenceModelFactory.h
@@ -0,0 +1,71 @@
+//=======================================================================================
+// ____          ____    __    ______     __________   __      __       __        __
+// \    \       |    |  |  |  |   _   \  |___    ___| |  |    |  |     /  \      |  |
+//  \    \      |    |  |  |  |  |_)   |     |  |     |  |    |  |    /    \     |  |
+//   \    \     |    |  |  |  |   _   /      |  |     |  |    |  |   /  /\  \    |  |
+//    \    \    |    |  |  |  |  | \  \      |  |     |   \__/   |  /  ____  \   |  |____
+//     \    \   |    |  |__|  |__|  \__\     |__|      \________/  /__/    \__\  |_______|
+//      \    \  |    |   ________________________________________________________________
+//       \    \ |    |  |  ______________________________________________________________|
+//        \    \|    |  |  |         __          __     __     __     ______      _______
+//         \         |  |  |_____   |  |        |  |   |  |   |  |   |   _  \    /  _____)
+//          \        |  |   _____|  |  |        |  |   |  |   |  |   |  | \  \   \_______
+//           \       |  |  |        |  |_____   |   \_/   |   |  |   |  |_/  /    _____  |
+//            \ _____|  |__|        |________|   \_______/    |__|   |______/    (_______/
+//
+//  This file is part of VirtualFluids. VirtualFluids is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  VirtualFluids is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with VirtualFluids (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file TurbulentViscosityFactory.h
+//! \ingroup TurbulentViscosity
+//! \author Henrik Asmuth
+//=======================================================================================
+#ifndef TurbulenceModelFactory_H
+#define TurbulenceModelFactory_H
+
+#include <functional>
+#include <map>
+#include <string>
+#include <variant>
+
+#include "LBM/LB.h"
+#include "Parameter/Parameter.h"
+
+#include <basics/config/ConfigurationFile.h>
+
+class Parameter;
+
+using TurbulenceModelKernel = std::function<void(Parameter *, int )>;
+
+class TurbulenceModelFactory
+{
+public:
+    
+    TurbulenceModelFactory(SPtr<Parameter> parameter): para(parameter) {}
+
+    void setTurbulenceModel(TurbulenceModel _turbulenceModel);
+
+    void setModelConstant(real modelConstant);
+
+    void readConfigFile(const vf::basics::ConfigurationFile &configData);
+
+    void runTurbulenceModelKernel(const int level) const;
+
+private:
+    TurbulenceModel turbulenceModel = TurbulenceModel::None;
+    TurbulenceModelKernel turbulenceModelKernel = nullptr;
+    SPtr<Parameter> para;
+
+};
+
+#endif
diff --git a/src/gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h b/src/gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h
new file mode 100644
index 0000000000000000000000000000000000000000..2ecbed5fd6c4b84b92c05953a4c96ecdc3b988de
--- /dev/null
+++ b/src/gpu/VirtualFluids_GPU/Utilities/FindNeighbors.h
@@ -0,0 +1,33 @@
+#ifndef FIND_NEIGHBORS_H
+#define FIND_NEIGHBORS_H
+
+#include "Parameter/Parameter.h"
+#include "lbm/constants/D3Q27.h"
+
+using namespace vf::lbm::dir;
+
+// Only use for fluid nodes!
+inline uint getNeighborIndex(LBMSimulationParameter *parH, const uint position, const int direction)
+{
+    uint nodeIndex = position;
+
+    if (mapForPointerChasing.at(direction).counterInverse != 0) {
+        nodeIndex = parH->neighborInverse[nodeIndex];
+    }
+
+    for (uint x = 0; x < mapForPointerChasing.at(direction).counterX; x++) {
+        nodeIndex = parH->neighborX[nodeIndex];
+    }
+
+    for (uint y = 0; y < mapForPointerChasing.at(direction).counterY; y++) {
+        nodeIndex = parH->neighborY[nodeIndex];
+    }
+
+    for (uint z = 0; z < mapForPointerChasing.at(direction).counterZ; z++) {
+        nodeIndex = parH->neighborZ[nodeIndex];
+    }
+
+    return nodeIndex;
+}
+
+#endif
diff --git a/src/lbm/MacroscopicQuantities.h b/src/lbm/MacroscopicQuantities.h
index c37791294ff5b4edad21795f4ce0a32a18c5d236..8789f65195ee38b1399a42a0c24511dfcea3d6d0 100644
--- a/src/lbm/MacroscopicQuantities.h
+++ b/src/lbm/MacroscopicQuantities.h
@@ -25,10 +25,10 @@ namespace lbm
 
 inline __host__ __device__ real getDensity(const real *const &f /*[27]*/)
 {
-    return ((f[dir::TNE] + f[dir::BSW]) + (f[dir::TSE] + f[dir::BNW])) + ((f[dir::BSE] + f[dir::TNW]) + (f[dir::TSW] + f[dir::BNE])) +
-           (((f[dir::NE] + f[dir::SW]) + (f[dir::SE] + f[dir::NW])) + ((f[dir::TE] + f[dir::BW]) + (f[dir::BE] + f[dir::TW])) +
-            ((f[dir::BN] + f[dir::TS]) + (f[dir::TN] + f[dir::BS]))) +
-           ((f[dir::E] + f[dir::W]) + (f[dir::N] + f[dir::S]) + (f[dir::T] + f[dir::B])) + f[dir::REST];
+    return ((f[dir::DIR_PPP] + f[dir::DIR_MMM]) + (f[dir::DIR_PMP] + f[dir::DIR_MPM])) + ((f[dir::DIR_PMM] + f[dir::DIR_MPP]) + (f[dir::DIR_MMP] + f[dir::DIR_PPM])) +
+           (((f[dir::DIR_PP0] + f[dir::DIR_MM0]) + (f[dir::DIR_PM0] + f[dir::DIR_MP0])) + ((f[dir::DIR_P0P] + f[dir::DIR_M0M]) + (f[dir::DIR_P0M] + f[dir::DIR_M0P])) +
+            ((f[dir::DIR_0PM] + f[dir::DIR_0MP]) + (f[dir::DIR_0PP] + f[dir::DIR_0MM]))) +
+           ((f[dir::DIR_P00] + f[dir::DIR_M00]) + (f[dir::DIR_0P0] + f[dir::DIR_0M0]) + (f[dir::DIR_00P] + f[dir::DIR_00M])) + f[dir::DIR_000];
 }
 
 /*
@@ -36,22 +36,22 @@ inline __host__ __device__ real getDensity(const real *const &f /*[27]*/)
 */
 inline __host__ __device__ real getIncompressibleVelocityX1(const real *const &f /*[27]*/)
 {
-    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::TSE] - f[dir::BNW])) + ((f[dir::BSE] - f[dir::TNW]) + (f[dir::BNE] - f[dir::TSW]))) +
-            (((f[dir::BE] - f[dir::TW]) + (f[dir::TE] - f[dir::BW])) + ((f[dir::SE] - f[dir::NW]) + (f[dir::NE] - f[dir::SW]))) + (f[dir::E] - f[dir::W]));
+    return ((((f[dir::DIR_PPP] - f[dir::DIR_MMM]) + (f[dir::DIR_PMP] - f[dir::DIR_MPM])) + ((f[dir::DIR_PMM] - f[dir::DIR_MPP]) + (f[dir::DIR_PPM] - f[dir::DIR_MMP]))) +
+            (((f[dir::DIR_P0M] - f[dir::DIR_M0P]) + (f[dir::DIR_P0P] - f[dir::DIR_M0M])) + ((f[dir::DIR_PM0] - f[dir::DIR_MP0]) + (f[dir::DIR_PP0] - f[dir::DIR_MM0]))) + (f[dir::DIR_P00] - f[dir::DIR_M00]));
 }
 
 
 inline __host__ __device__ real getIncompressibleVelocityX2(const real *const &f /*[27]*/)
 {
-    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::BNW] - f[dir::TSE])) + ((f[dir::TNW] - f[dir::BSE]) + (f[dir::BNE] - f[dir::TSW]))) +
-            (((f[dir::BN] - f[dir::TS]) + (f[dir::TN] - f[dir::BS])) + ((f[dir::NW] - f[dir::SE]) + (f[dir::NE] - f[dir::SW]))) + (f[dir::N] - f[dir::S]));
+    return ((((f[dir::DIR_PPP] - f[dir::DIR_MMM]) + (f[dir::DIR_MPM] - f[dir::DIR_PMP])) + ((f[dir::DIR_MPP] - f[dir::DIR_PMM]) + (f[dir::DIR_PPM] - f[dir::DIR_MMP]))) +
+            (((f[dir::DIR_0PM] - f[dir::DIR_0MP]) + (f[dir::DIR_0PP] - f[dir::DIR_0MM])) + ((f[dir::DIR_MP0] - f[dir::DIR_PM0]) + (f[dir::DIR_PP0] - f[dir::DIR_MM0]))) + (f[dir::DIR_0P0] - f[dir::DIR_0M0]));
 }
 
 
 inline __host__ __device__ real getIncompressibleVelocityX3(const real *const &f /*[27]*/)
 {
-    return ((((f[dir::TNE] - f[dir::BSW]) + (f[dir::TSE] - f[dir::BNW])) + ((f[dir::TNW] - f[dir::BSE]) + (f[dir::TSW] - f[dir::BNE]))) +
-            (((f[dir::TS] - f[dir::BN]) + (f[dir::TN] - f[dir::BS])) + ((f[dir::TW] - f[dir::BE]) + (f[dir::TE] - f[dir::BW]))) + (f[dir::T] - f[dir::B]));
+    return ((((f[dir::DIR_PPP] - f[dir::DIR_MMM]) + (f[dir::DIR_PMP] - f[dir::DIR_MPM])) + ((f[dir::DIR_MPP] - f[dir::DIR_PMM]) + (f[dir::DIR_MMP] - f[dir::DIR_PPM]))) +
+            (((f[dir::DIR_0MP] - f[dir::DIR_0PM]) + (f[dir::DIR_0PP] - f[dir::DIR_0MM])) + ((f[dir::DIR_M0P] - f[dir::DIR_P0M]) + (f[dir::DIR_P0P] - f[dir::DIR_M0M]))) + (f[dir::DIR_00P] - f[dir::DIR_00M]));
 }
 
 
@@ -81,12 +81,12 @@ inline __host__ __device__ real getCompressibleVelocityX3(const real *const &f27
 */
 inline __host__ __device__ real getPressure(const real *const &f27, const real& rho, const real& vx, const real& vy, const real& vz)
 {
-    return (f27[dir::E] + f27[dir::W] + f27[dir::N] + f27[dir::S] + f27[dir::T] + f27[dir::B] + 
-    constant::c2o1 * (f27[dir::NE] + f27[dir::SW] + f27[dir::SE] + f27[dir::NW] + f27[dir::TE] + 
-                      f27[dir::BW] + f27[dir::BE] + f27[dir::TW] + f27[dir::TN] + f27[dir::BS] + 
-                      f27[dir::BN] + f27[dir::TS]) + 
-    constant::c3o1 * (f27[dir::TNE] + f27[dir::TSW] + f27[dir::TSE] + f27[dir::TNW] + 
-                      f27[dir::BNE] + f27[dir::BSW] + f27[dir::BSE] + f27[dir::BNW]) -
+    return (f27[dir::DIR_P00] + f27[dir::DIR_M00] + f27[dir::DIR_0P0] + f27[dir::DIR_0M0] + f27[dir::DIR_00P] + f27[dir::DIR_00M] + 
+    constant::c2o1 * (f27[dir::DIR_PP0] + f27[dir::DIR_MM0] + f27[dir::DIR_PM0] + f27[dir::DIR_MP0] + f27[dir::DIR_P0P] + 
+                      f27[dir::DIR_M0M] + f27[dir::DIR_P0M] + f27[dir::DIR_M0P] + f27[dir::DIR_0PP] + f27[dir::DIR_0MM] + 
+                      f27[dir::DIR_0PM] + f27[dir::DIR_0MP]) + 
+    constant::c3o1 * (f27[dir::DIR_PPP] + f27[dir::DIR_MMP] + f27[dir::DIR_PMP] + f27[dir::DIR_MPP] + 
+                      f27[dir::DIR_PPM] + f27[dir::DIR_MMM] + f27[dir::DIR_PMM] + f27[dir::DIR_MPM]) -
     rho - (vx * vx + vy * vy + vz * vz) * (constant::c1o1 + rho)) * 
     constant::c1o2 + rho; // times zero for incompressible case                 
                           // Attention: op defined directly to op = 1 ; ^^^^(1.0/op-0.5)=0.5
diff --git a/src/lbm/MacroscopicQuantitiesTests.cpp b/src/lbm/MacroscopicQuantitiesTests.cpp
index 52f89943a3e6a99fcab13eba22de9959229a289d..382ec180e7a97749c4a005734a61f96ec7a146d5 100644
--- a/src/lbm/MacroscopicQuantitiesTests.cpp
+++ b/src/lbm/MacroscopicQuantitiesTests.cpp
@@ -34,7 +34,7 @@ TEST(MacroscopicQuantitiesTest, whenFsAreEqual_velocityInEachDirectionShouldBeZe
 
 TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Eis2_velocityInX1ShouldBeOne)
 {
-    f[dir::E] = 2.;
+    f[dir::DIR_P00] = 2.;
 
     const double velocityX1 = getIncompressibleVelocityX1(f);
     const double velocityX2 = getIncompressibleVelocityX2(f);
@@ -51,7 +51,7 @@ TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Eis2_velocityInX1ShouldBeO
 
 TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Nis2_velocityInX2ShouldBeOne)
 {
-    f[dir::N] = 2.;
+    f[dir::DIR_0P0] = 2.;
 
     const double velocity = getIncompressibleVelocityX2(f);
 
@@ -62,7 +62,7 @@ TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Nis2_velocityInX2ShouldBeO
 
 TEST(MacroscopicQuantitiesTest, givenAllFsAreOne_when_Tis2_velocityInX3ShouldBeOne)
 {
-    f[dir::T] = 2.;
+    f[dir::DIR_00P] = 2.;
 
     const double velocity = getIncompressibleVelocityX3(f);
 
diff --git a/src/lbm/constants/D3Q27.h b/src/lbm/constants/D3Q27.h
index d13da3d610ec69cb5eb5e553491ba047d0b09890..6a198e926477eff4534108686793400de3a7e042 100644
--- a/src/lbm/constants/D3Q27.h
+++ b/src/lbm/constants/D3Q27.h
@@ -1,67 +1,171 @@
 #ifndef LBM_D3Q27_H
 #define LBM_D3Q27_H
 
+#include <map>
+#include "basics/Core/DataTypes.h"
+
 namespace vf::lbm::dir
 {
 
 static constexpr int STARTDIR = 0;
 static constexpr int ENDDIR   = 26;
 
-static constexpr int E    = 0;
-static constexpr int W    = 1;
-static constexpr int N    = 2;
-static constexpr int S    = 3;
-static constexpr int T    = 4;
-static constexpr int B    = 5;
-static constexpr int NE   = 6;
-static constexpr int SW   = 7;
-static constexpr int SE   = 8;
-static constexpr int NW   = 9;
-static constexpr int TE   = 10;
-static constexpr int BW   = 11;
-static constexpr int BE   = 12;
-static constexpr int TW   = 13;
-static constexpr int TN   = 14;
-static constexpr int BS   = 15;
-static constexpr int BN   = 16;
-static constexpr int TS   = 17;
-static constexpr int TNE  = 18;
-static constexpr int TNW  = 19;
-static constexpr int TSE  = 20;
-static constexpr int TSW  = 21;
-static constexpr int BNE  = 22;
-static constexpr int BNW  = 23;
-static constexpr int BSE  = 24;
-static constexpr int BSW  = 25;
-static constexpr int REST = 26;
-
-static constexpr int PZZ = 0;
-static constexpr int MZZ = 1;
-static constexpr int ZPZ = 2;
-static constexpr int ZMZ = 3;
-static constexpr int ZZP = 4;
-static constexpr int ZZM = 5;
-static constexpr int PPZ = 6;
-static constexpr int MMZ = 7;
-static constexpr int PMZ = 8;
-static constexpr int MPZ = 9;
-static constexpr int PZP = 10;
-static constexpr int MZM = 11;
-static constexpr int PZM = 12;
-static constexpr int MZP = 13;
-static constexpr int ZPP = 14;
-static constexpr int ZMM = 15;
-static constexpr int ZPM = 16;
-static constexpr int ZMP = 17;
-static constexpr int PPP = 18;
-static constexpr int MPP = 19;
-static constexpr int PMP = 20;
-static constexpr int MMP = 21;
-static constexpr int PPM = 22;
-static constexpr int MPM = 23;
-static constexpr int PMM = 24;
-static constexpr int MMM = 25;
-static constexpr int ZZZ = 26;
+// used in the CPU and the GPU version
+static constexpr int DIR_000 = 0;    // REST
+static constexpr int DIR_P00 = 1;    // E
+static constexpr int DIR_M00 = 2;    // W
+static constexpr int DIR_0P0 = 3;    // N
+static constexpr int DIR_0M0 = 4;    // S
+static constexpr int DIR_00P = 5;    // T
+static constexpr int DIR_00M = 6;    // B
+
+static constexpr int DIR_PP0 = 7;    // NE
+static constexpr int DIR_MM0 = 8;    // SW
+static constexpr int DIR_PM0 = 9;    // SE
+static constexpr int DIR_MP0 = 10;   // NW
+static constexpr int DIR_P0P = 11;   // TE
+static constexpr int DIR_M0M = 12;   // BW
+static constexpr int DIR_P0M = 13;   // BE
+static constexpr int DIR_M0P = 14;   // TW
+static constexpr int DIR_0PP = 15;   // TN
+static constexpr int DIR_0MM = 16;   // BS
+static constexpr int DIR_0PM = 17;   // BN
+static constexpr int DIR_0MP = 18;   // TS
+
+static constexpr int DIR_PPP = 19;   // TNE
+static constexpr int DIR_MPP = 20;   // TNW
+static constexpr int DIR_PMP = 21;   // TSE
+static constexpr int DIR_MMP = 22;   // TSW
+static constexpr int DIR_PPM = 23;   // BNE
+static constexpr int DIR_MPM = 24;   // BNW
+static constexpr int DIR_PMM = 25;   // BSE
+static constexpr int DIR_MMM = 26;   // BSW
+
+struct countersForPointerChasing{
+    uint counterInverse;
+    uint counterX;
+    uint counterY;
+    uint counterZ;
+};
+
+const std::map<const int, const countersForPointerChasing> mapForPointerChasing = 
+{
+    {DIR_000, countersForPointerChasing{0, 0, 0, 0}},
+    {DIR_P00, countersForPointerChasing{0, 1, 0, 0}},
+    {DIR_M00, countersForPointerChasing{1, 0, 1, 1}},
+    {DIR_0P0, countersForPointerChasing{0, 0, 1, 0}},
+    {DIR_0M0, countersForPointerChasing{1, 1, 0, 1}},
+    {DIR_00P, countersForPointerChasing{0, 0, 0, 1}},
+    {DIR_00M, countersForPointerChasing{1, 1, 1, 0}},
+
+    {DIR_PP0, countersForPointerChasing{0, 1, 1, 0}},
+    {DIR_MM0, countersForPointerChasing{1, 0, 0, 1}},
+    {DIR_PM0, countersForPointerChasing{1, 2, 0, 1}},
+    {DIR_MP0, countersForPointerChasing{1, 0, 2, 1}},
+    {DIR_P0P, countersForPointerChasing{0, 1, 0, 1}},
+    {DIR_M0M, countersForPointerChasing{1, 0, 1, 0}},
+    {DIR_P0M, countersForPointerChasing{1, 2, 1, 0}},
+    {DIR_M0P, countersForPointerChasing{1, 0, 1, 2}},
+    {DIR_0PP, countersForPointerChasing{0, 0, 1, 1}},
+    {DIR_0MM, countersForPointerChasing{1, 1, 0, 0}},
+    {DIR_0PM, countersForPointerChasing{1, 1, 2, 0}},
+    {DIR_0MP, countersForPointerChasing{1, 1, 0, 2}},
+
+    {DIR_PPP, countersForPointerChasing{0, 1, 1, 1}},
+    {DIR_MPP, countersForPointerChasing{1, 0, 2, 2}},
+    {DIR_PMP, countersForPointerChasing{1, 2, 0, 2}},
+    {DIR_MMP, countersForPointerChasing{1, 0, 0, 2}},
+    {DIR_PPM, countersForPointerChasing{1, 2, 2, 0}},
+    {DIR_MPM, countersForPointerChasing{1, 0, 2, 0}},
+    {DIR_PMM, countersForPointerChasing{1, 2, 0, 0}},
+    {DIR_MMM, countersForPointerChasing{1, 0, 0, 0}}
+};
+
+
+
+// used in the CPU version
+// static constexpr int INV_P00 = DIR_M00;
+// static constexpr int INV_M00 = DIR_P00;
+// static constexpr int INV_0P0 = DIR_0M0;
+// static constexpr int INV_0M0 = DIR_0P0;
+// static constexpr int INV_00P = DIR_00M;
+// static constexpr int INV_00M = DIR_00P;
+// static constexpr int INV_PP0 = DIR_MM0;
+// static constexpr int INV_MM0 = DIR_PP0;
+// static constexpr int INV_PM0 = DIR_MP0;
+// static constexpr int INV_MP0 = DIR_PM0;
+// static constexpr int INV_P0P = DIR_M0M;
+// static constexpr int INV_M0M = DIR_P0P;
+// static constexpr int INV_P0M = DIR_M0P;
+// static constexpr int INV_M0P = DIR_P0M;
+// static constexpr int INV_0PP = DIR_0MM;
+// static constexpr int INV_0MM = DIR_0PP;
+// static constexpr int INV_0PM = DIR_0MP;
+// static constexpr int INV_0MP = DIR_0PM;
+// static constexpr int INV_PPP = DIR_MMM;
+// static constexpr int INV_MPP = DIR_PMM;
+// static constexpr int INV_PMP = DIR_MPM;
+// static constexpr int INV_MMP = DIR_PPM;
+// static constexpr int INV_PPM = DIR_MMP;
+// static constexpr int INV_MPM = DIR_PMP;
+// static constexpr int INV_PMM = DIR_MPP;
+// static constexpr int INV_MMM = DIR_PPP;
+
+// static constexpr int SGD_P00 = 0;
+// static constexpr int SGD_M00 = 1;
+// static constexpr int SGD_0P0 = 2;
+// static constexpr int SGD_0M0 = 3;
+// static constexpr int SGD_00P = 4;
+// static constexpr int SGD_00M = 5;
+// static constexpr int SGD_PP0 = 6;
+// static constexpr int SGD_MM0 = 7;
+// static constexpr int SGD_PM0 = 8;
+// static constexpr int SGD_MP0 = 9;
+// static constexpr int SGD_P0P = 10;
+// static constexpr int SGD_M0M = 11;
+// static constexpr int SGD_P0M = 12;
+// static constexpr int SGD_M0P = 13;
+// static constexpr int SGD_0PP = 14;
+// static constexpr int SGD_0MM = 15;
+// static constexpr int SGD_0PM = 16;
+// static constexpr int SGD_0MP = 17;
+// static constexpr int SGD_PPP = 18;
+// static constexpr int SGD_MPP = 19;
+// static constexpr int SGD_PMP = 20;
+// static constexpr int SGD_MMP = 21;
+// static constexpr int SGD_PPM = 22;
+// static constexpr int SGD_MPM = 23;
+// static constexpr int SGD_PMM = 24;
+// static constexpr int SGD_MMM = 25;
+
 
+// DEPRECATED
+static constexpr int ZZZ = DIR_000;
+static constexpr int PZZ = DIR_P00;
+static constexpr int MZZ = DIR_M00;
+static constexpr int ZPZ = DIR_0P0;
+static constexpr int ZMZ = DIR_0M0;
+static constexpr int ZZP = DIR_00P;
+static constexpr int ZZM = DIR_00M;
+static constexpr int PPZ = DIR_PP0;
+static constexpr int MMZ = DIR_MM0;
+static constexpr int PMZ = DIR_PM0;
+static constexpr int MPZ = DIR_MP0;
+static constexpr int PZP = DIR_P0P;
+static constexpr int MZM = DIR_M0M;
+static constexpr int PZM = DIR_P0M;
+static constexpr int MZP = DIR_M0P;
+static constexpr int ZPP = DIR_0PP;
+static constexpr int ZMM = DIR_0MM;
+static constexpr int ZPM = DIR_0PM;
+static constexpr int ZMP = DIR_0MP;
+static constexpr int PPP = DIR_PPP;
+static constexpr int MPP = DIR_MPP;
+static constexpr int PMP = DIR_PMP;
+static constexpr int MMP = DIR_MMP;
+static constexpr int PPM = DIR_PPM;
+static constexpr int MPM = DIR_MPM;
+static constexpr int PMM = DIR_PMM;
+static constexpr int MMM = DIR_MMM;
 }
 #endif